summaryrefslogtreecommitdiff
path: root/runtime/onert/backend
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
commit62529acabbafce7730601ed01d5709d7bc0d378a (patch)
treebf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert/backend
parent6ea13af5257155ff993c205cf997b870cc627f73 (diff)
downloadnnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'runtime/onert/backend')
-rw-r--r--runtime/onert/backend/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h12
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.cc302
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.h70
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc2
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h6
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc82
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h59
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.h7
-rw-r--r--runtime/onert/backend/acl_cl/acl_cl.cc15
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.cc2
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h4
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h29
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc18
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h12
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.cc302
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.h71
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h6
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc79
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h25
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.h7
-rw-r--r--runtime/onert/backend/acl_neon/acl_neon.cc15
-rw-r--r--runtime/onert/backend/cpu/Backend.h2
-rw-r--r--runtime/onert/backend/cpu/BackendContext.cc147
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h35
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc94
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h38
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h3
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc72
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h76
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.cc107
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.h33
-rw-r--r--runtime/onert/backend/cpu/Tensor.h87
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h19
-rw-r--r--runtime/onert/backend/cpu/cpu.cc15
-rw-r--r--runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc6
-rw-r--r--runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/ConcatLayer.cc34
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc78
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc14
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc32
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h2
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc7
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.cc22
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/MeanLayer.cc25
-rw-r--r--runtime/onert/backend/ruy/Backend.h68
-rw-r--r--runtime/onert/backend/ruy/BackendContext.cc147
-rw-r--r--runtime/onert/backend/ruy/BackendContext.h78
-rw-r--r--runtime/onert/backend/ruy/CMakeLists.txt22
-rw-r--r--runtime/onert/backend/ruy/Config.cc (renamed from runtime/onert/backend/cpu/Tensor.cc)10
-rw-r--r--runtime/onert/backend/ruy/Config.h48
-rw-r--r--runtime/onert/backend/ruy/ConstantInitializer.h35
-rw-r--r--runtime/onert/backend/ruy/ExternalContext.h60
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.cc171
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.h64
-rw-r--r--runtime/onert/backend/ruy/StaticTensorManager.h35
-rw-r--r--runtime/onert/backend/ruy/Tensor.h37
-rw-r--r--runtime/onert/backend/ruy/TensorBuilder.cc90
-rw-r--r--runtime/onert/backend/ruy/TensorBuilder.h73
-rw-r--r--runtime/onert/backend/ruy/ops/ConvolutionLayer.cc153
-rw-r--r--runtime/onert/backend/ruy/ops/ConvolutionLayer.h90
-rw-r--r--runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc103
-rw-r--r--runtime/onert/backend/ruy/ops/FullyConnectedLayer.h69
-rw-r--r--runtime/onert/backend/ruy/ops/OperationUtils.cc47
-rw-r--r--runtime/onert/backend/ruy/ops/OperationUtils.h123
-rw-r--r--runtime/onert/backend/ruy/ruy.cc24
-rw-r--r--runtime/onert/backend/xnnpack/Backend.h68
-rw-r--r--runtime/onert/backend/xnnpack/BackendContext.cc147
-rw-r--r--runtime/onert/backend/xnnpack/BackendContext.h85
-rw-r--r--runtime/onert/backend/xnnpack/CMakeLists.txt26
-rw-r--r--runtime/onert/backend/xnnpack/Config.cc44
-rw-r--r--runtime/onert/backend/xnnpack/Config.h51
-rw-r--r--runtime/onert/backend/xnnpack/ConstantInitializer.h35
-rw-r--r--runtime/onert/backend/xnnpack/ExternalContext.cc36
-rw-r--r--runtime/onert/backend/xnnpack/ExternalContext.h46
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.cc197
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.h65
-rw-r--r--runtime/onert/backend/xnnpack/StaticTensorManager.h35
-rw-r--r--runtime/onert/backend/xnnpack/Tensor.h37
-rw-r--r--runtime/onert/backend/xnnpack/TensorBuilder.cc90
-rw-r--r--runtime/onert/backend/xnnpack/TensorBuilder.h73
-rw-r--r--runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc149
-rw-r--r--runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h77
-rw-r--r--runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc150
-rw-r--r--runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h77
-rw-r--r--runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc138
-rw-r--r--runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h61
-rw-r--r--runtime/onert/backend/xnnpack/ops/Layer.h81
-rw-r--r--runtime/onert/backend/xnnpack/ops/OperationUtils.h75
-rw-r--r--runtime/onert/backend/xnnpack/xnnpack.cc33
98 files changed, 4947 insertions, 686 deletions
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 42d622aa8..dc038c975 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -4,3 +4,5 @@ add_subdirectory(cpu)
add_subdirectory(acl_cl)
add_subdirectory(acl_neon)
add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(xnnpack)
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 5c5041378..4f48314c1 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -20,6 +20,7 @@
#include <memory>
#include <backend/Backend.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
new file mode 100644
index 000000000..a6f228a4f
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
new file mode 100644
index 000000000..662d767d0
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index b45b91058..413a7ccc3 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node)
const auto &axis_obj = _operands.at(axis_index);
const auto ifm_rank = input_obj.shape().rank();
- const auto frontend_layout = this->_current_op_seq_layout;
+ const auto frontend_layout = this->_current_layout;
auto output_tensor = this->_tensor_reg->getITensor(output_index);
const auto backend_layout = output_tensor->layout();
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 9f3acb461..fc0eca84f 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
@@ -45,4 +45,4 @@ public:
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index e7690af2e..3a5ea5a0f 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
std::unique_ptr<arm_compute::IFunction> fn;
@@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
@@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto &perms = _ctx.at(perm_idx);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
const size_t output_rank = _ctx.at(output_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
@@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
@@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
@@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
UNUSED_RELEASE(backend_layout);
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
@@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
auto ifm_shape = _ctx.at(ifm_index).shape();
auto ofm_shape = _ctx.at(ofm_index).shape();
@@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
- ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
@@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
{
int32_t split_dim = split_dim_op.asScalar<int32_t>();
uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
@@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
@@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto input = _tensor_reg->getAclTensor(input_index)->handle();
auto output = _tensor_reg->getAclTensor(output_index)->handle();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
::arm_compute::PaddingList padding_list;
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index e8a922677..22a7c18a3 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
namespace acl_cl
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,60 +39,61 @@ public:
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+ void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::ResizeNearestNeighbor &) override;
+ void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::TopKV2 &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::TopKV2 &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::ConvertFp32ToFp16 &) override;
- void visit(const ir::operation::ConvertFp16ToFp32 &) override;
- void visit(const ir::operation::Reverse &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
index 18d38ec1b..ad5154860 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.h
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -17,8 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
@@ -28,12 +27,12 @@ namespace backend
namespace acl_cl
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
index 88378b13a..82cbde02f 100644
--- a/runtime/onert/backend/acl_cl/acl_cl.cc
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -14,20 +14,11 @@
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
- return new onert::backend::acl_cl::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
index 21f41a3e6..921d107d9 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+ : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index 52f4c54cf..894e2e7d1 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
+#include <backend/cpu_common/ConstantInitializerBase.h>
#include <ir/Operands.h>
#include "AclTensorRegistry.h"
@@ -28,7 +28,7 @@ namespace backend
namespace acl_common
{
-class AclConstantInitializer : public IConstantInitializer
+class AclConstantInitializer : public cpu_common::ConstantInitializerBase
{
public:
AclConstantInitializer(const ir::Operands &operands,
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index bb7abc95d..12e9ab894 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -21,7 +21,6 @@
#include <queue>
#include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
@@ -43,14 +42,12 @@ enum class UsesType
LAST
};
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
{
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -59,16 +56,16 @@ public:
* @param[in] layout Tensor data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override;
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare();
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
@@ -105,7 +102,6 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
- std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -133,10 +129,9 @@ namespace acl_common
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
- const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+ T_AclTensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67d9d7176..7d3a69032 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -109,13 +109,19 @@ namespace acl_common
case ir::DataType::UINT8:
return ::arm_compute::DataType::U8;
case ir::DataType::QUANT_INT8_SYMM:
- return ::arm_compute::DataType::S8;
+ return ::arm_compute::DataType::QSYMM8;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8_SIGNED;
case ir::DataType::FLOAT16:
return ::arm_compute::DataType::F16;
case ir::DataType::INT64:
return ::arm_compute::DataType::S64;
+ case ir::DataType::QUANT_INT16_ASYMM:
+ return ::arm_compute::DataType::QASYMM16;
+ case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+ return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
default:
- throw std::runtime_error("Not supported, yet");
+ throw std::runtime_error("Not supported internal data type, yet");
break;
}
}
@@ -175,7 +181,7 @@ namespace acl_common
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal activation, yet"};
break;
}
}
@@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal elementwise activation, yet"};
break;
}
}
@@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
return ir::DataType::UINT32;
case ::arm_compute::DataType::QASYMM8:
return ir::DataType::QUANT_UINT8_ASYMM;
+ case ::arm_compute::DataType::QASYMM8_SIGNED:
+ return ir::DataType::QUANT_INT8_ASYMM;
case ::arm_compute::DataType::U8:
return ir::DataType::UINT8;
case ::arm_compute::DataType::QSYMM8:
@@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
case ::arm_compute::DataType::S64:
return ir::DataType::INT64;
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported acl data type, yet"};
break;
}
}
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 35d6e4e8e..b11c19733 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -21,6 +21,7 @@
#include <backend/Backend.h>
#include <ir/Operands.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
new file mode 100644
index 000000000..8b53171f7
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
new file mode 100644
index 000000000..dd764c091
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index c7d71cdcf..9723ba012 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
@@ -41,4 +41,4 @@ public:
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index ffaee3b3e..e712dfa81 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
}
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert(axis_value >= 0 && axis_value < ifm_rank);
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
- ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
@@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
@@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
@@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
{
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
@@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
@@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
@@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
@@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
@@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto rank = _ctx.at(ifm_idx).shape().rank();
@@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
@@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
const size_t output_rank = _ctx.at(out_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index 4d269cde5..2a4b307b8 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
namespace acl_neon
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,17 +39,20 @@ public:
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::ArgMax &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
@@ -57,36 +60,34 @@ public:
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
index 5fe0d519c..b8fb343e9 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.h
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -17,8 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
@@ -28,12 +27,12 @@ namespace backend
namespace acl_neon
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
index f490d132d..6535fb291 100644
--- a/runtime/onert/backend/acl_neon/acl_neon.cc
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -14,20 +14,11 @@
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
- return new onert::backend::acl_neon::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..0b416a7e9 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -54,8 +54,6 @@ public:
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..6b958c1b7
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..0a4106d33 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,9 @@
#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
@@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(new ExternalContext)
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
deleted file mode 100644
index 6f6eb77bc..000000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
- auto data = model_obj.shareData();
- assert(data && data->base());
- ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
- tensor.setData(data);
- };
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerExternalInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- if (!bias_index.undefined())
- {
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
- }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index c016c83bc..d7858c0f6 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -14,13 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
@@ -29,35 +26,10 @@ namespace backend
namespace cpu
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
- void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
- // TODO: For now the only cpu backend supports constant tensor to use data from external
- // If the other backend supports (to do this,
- // ExternalTensor should be abstract such as IExternal, maybe),
- // this can be an interface of IConstantInitializer
- void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace cpu
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 32e249f5a..f5d11f4f1 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,7 +17,6 @@
#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
@@ -33,7 +32,7 @@ namespace backend
namespace cpu
{
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(new ruy::Context)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 451815b65..25756eced 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -23,6 +23,7 @@
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
#include "ops/EinsumLayer.h"
#include "ops/ElementwiseActivationLayer.h"
@@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
{
switch (type_ir)
{
+ case ir::operation::ElementwiseActivation::Type::ELU:
+ return ops::ElementwiseActivationType::kElu;
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
return ops::ElementwiseActivationType::kLogistic;
case ir::operation::ElementwiseActivation::Type::RELU:
return ops::ElementwiseActivationType::kReLU;
case ir::operation::ElementwiseActivation::Type::TANH:
return ops::ElementwiseActivationType::kTanh;
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ops::ElementwiseActivationType::kLeakyReLU;
default:
throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
}
@@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
{
switch (type_ir)
{
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ return ops::ElementwiseBinaryType::kLogicalAnd;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
return ops::ElementwiseBinaryType::kLogicalOr;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
return ops::ElementwiseUnaryType::kRSqrt;
case ir::operation::ElementwiseUnary::Type::SIN:
return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ return ops::ElementwiseUnaryType::kSqrt;
+ case ir::operation::ElementwiseUnary::Type::SQUARE:
+ return ops::ElementwiseUnaryType::kSquare;
case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
return ops::ElementwiseUnaryType::kZerosLike;
default:
@@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
_tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
_return_fn = std::move(fn);
return;
}
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
- dilation_height, activation, ofm_tensor);
+ dilation_height, activation, ofm_tensor, _external_context);
_return_fn = std::move(fn);
}
@@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
@@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
void KernelGenerator::visit(const ir::operation::Fill &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+ // SHAPE input is used for shape inference
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
- auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto value_tensor = _tensor_reg->getPortableTensor(value_index);
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_tensor, value_tensor, output_tensor);
+ fn->configure(value_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
- assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+ assert(input_shape.rank() < 4 || _current_layout == backend_layout);
const auto axis_raw = node.param().axis;
const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
- // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ // TODO make sure using `_current_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
@@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ // AXIS input is used for output shape inference
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(-rank <= axis && axis < rank);
@@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
const auto input_index{node.getInputs().at(0)};
const auto rank = _ctx.at(input_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(rank == 0 || (-rank <= axis && axis < rank));
@@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+ const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
@@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
_return_fn = std::move(fn);
}
@@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
const auto padding =
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
@@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+ auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 5df77607f..3a4cfbffa 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -23,7 +23,7 @@
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
@@ -34,7 +34,7 @@ namespace backend
namespace cpu
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -43,59 +43,59 @@ public:
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- using IKernelGenerator::visit;
+ void visit(const ir::OpSequence &) override;
void visit(const ir::operation::AddN &) override;
- void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::ArgMinMax &) override;
+ void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::BroadcastTo &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::Custom &node) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::Fill &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Custom &node) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::Fill &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::FusedBatchNorm &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Select &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Shape &) override;
- void visit(const ir::operation::ResizeBilinear &node) override;
- void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::Rank &) override;
- void visit(const ir::operation::MatrixBandPart &) override;
- void visit(const ir::operation::BatchMatMul &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::BroadcastTo &) override;
- void visit(const ir::operation::FusedBatchNorm &) override;
- void visit(const ir::operation::LogSoftmax &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
+ void visit(const ir::operation::Reverse &) override;
+ void visit(const ir::operation::Select &) override;
+ void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Tile &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
private:
const ir::Operands &_ctx;
@@ -103,7 +103,7 @@ private:
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 3edac897c..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager)
- : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
-{
- // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (!_as_constants[ind] && !tensor->is_dynamic())
- {
- auto *buffer = _nonconst_mgr->getBuffer(ind);
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
- }
- }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(!_tensors->getITensor(ind));
- if (as_const)
- {
- auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- else
- {
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
- _dynamic_tensor_manager->dynamic_mem_mgr().get());
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (const auto &it : _tensors->native_tensors())
- fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..d07f0c814 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
namespace onert
{
@@ -32,30 +26,7 @@ namespace backend
namespace cpu
{
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
- StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager);
- virtual ~StaticTensorManager() = default;
-
- void allocateNonconsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- ir::OperandIndexMap<bool> _as_constants;
- cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 2ad2ad0fb..d663c3f50 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -28,92 +28,7 @@ namespace cpu
{
using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- * instead of allocating and copying the data. ExternalTensor's data pointer points to
- * an address of memory such as where memory is already allocated, or mmapped area.
- * This is meaning that ExternalTensor can take all of types' ir::Data.
- * To support this, assume below things no padding, always NHWC layout,
- * constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
- ExternalTensor() = delete;
- virtual ~ExternalTensor();
-
-public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
- : Tensor(info, layout, nullptr)
- {
- assert(_layout == ir::Layout::NHWC);
- assert(_info.isConstant());
- assert(_info.isDynamic() == false);
- }
-
-public:
- /**
- * @brief set Data to be shared from external so that this ExternalTensor will not be
- * allocated on CPU backend
- * @param[in] data data of Operand to be set
- */
- void setData(const std::shared_ptr<ir::Data> data)
- {
- assert(data != nullptr);
- _data = data;
- // Note. Some op such as cker::Conv could take buffer as nullptr.
- // That's why _buffer also would be used
- _buffer = const_cast<uint8_t *>(_data->base());
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
-
- bool is_constant() const override { return true; }
- bool is_dynamic() const override { return false; }
- void set_dynamic() override
- {
- throw std::runtime_error("This tensor does not support changing dynamic");
- }
-
- void setShape(const ir::Shape &) override
- {
- throw std::runtime_error("This tensor does not support changing shape");
- }
-
- void increase_ref() override { ++_num_references; }
-
- void decrease_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- --_num_references;
- if (_num_references == 0)
- {
- _data.reset();
- _buffer = nullptr;
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- void reset_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- _data.reset();
- _buffer = nullptr;
- }
-
- int32_t num_references() override { return _num_references; }
-
-private:
- std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 448abc229..9d8a5deb5 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -20,7 +20,6 @@
#include <backend/cpu_common/DynamicTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include "StaticTensorManager.h"
@@ -35,7 +34,7 @@ namespace backend
namespace cpu
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
#include "Backend.h"
-#include <util/logging.h>
-
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'cpu' loaded\n";
- return new onert::backend::cpu::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index 2fd284c91..d5ffdef0b 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -79,6 +79,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
break;
@@ -97,6 +100,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
break;
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..ba9655924 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
void BatchMatMulLayer::run()
{
- if (_lhs->data_type() == OperandType::FLOAT32)
+ if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
{
batchMatMulFloat32();
}
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..edfdfc1a6 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
void ConcatLayer::run()
{
- if (_output->data_type() == OperandType::FLOAT32)
+ switch (_output->data_type())
{
- concatenationGeneral<float>();
+ case OperandType::FLOAT32:
+ concatenationGeneral<float>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ concatenationQuant8();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ concatenationGeneral<int8_t>();
+ break;
+ case OperandType::INT32:
+ concatenationGeneral<int32_t>();
+ break;
+ case OperandType::INT64:
+ concatenationGeneral<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Concat: unsupported data type");
}
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- concatenationQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- concatenationGeneral<int32_t>();
- }
- else if (_output->data_type() == OperandType::INT64)
- {
- concatenationGeneral<int64_t>();
- }
- else
- throw std::runtime_error("Concat: unsupported data type");
}
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 799e9e2d0..c964e38f9 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -203,8 +203,6 @@ void ConvolutionLayer::prepare()
_prepare = true;
}
-#undef ANDROID_NN_CONV_PARAMETERS
-
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..d265d0ac2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+ nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+ _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ depthToSpace<float>();
+ break;
+ case OperandType::INT32:
+ depthToSpace<int32_t>();
+ break;
+ case OperandType::INT64:
+ depthToSpace<int64_t>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ depthToSpace<uint8_t>();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ depthToSpace<int8_t>();
+ break;
+ default:
+ throw std::runtime_error{"DepthToSpace: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+ DepthToSpaceLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void depthToSpace();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index f1dc1103a..85553d14d 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32()
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<float, float>(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::convQuant8()
@@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8()
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::configure(
@@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure(
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
- const ir::Activation activation, IPortableTensor *output)
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_kernel = kernel;
@@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure(
_dilationHeight = dilationHeight;
_activation = activation;
_output = output;
+ _external_context = external_context;
}
void DepthwiseConvolutionLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index fb032ecbf..fe1fcc182 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
+#include "../ExternalContext.h"
#include <exec/IFunction.h>
@@ -47,7 +48,7 @@ public:
const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
const uint32_t multiplier, const uint32_t dilationWidth,
const uint32_t dilationHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
void run() override;
@@ -71,6 +72,8 @@ private:
uint32_t _dilationHeight{1};
ir::Activation _activation{ir::Activation::NONE};
+
+ std::shared_ptr<ExternalContext> _external_context;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..3e1da5ec0 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
#include "OperationUtils.h"
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/ReLU.h>
#include <cker/operation/ReLU6.h>
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
switch (op_type)
{
+ case ElementwiseActivationType::kElu:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+ }
+ break;
case ElementwiseActivationType::kLogistic:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
@@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
}
break;
+ case ElementwiseActivationType::kLeakyReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+ }
+ break;
default:
throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
}
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..948ab3b57 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
enum class ElementwiseActivationType
{
+ kElu,
kLogistic,
kReLU,
- kTanh
+ kTanh,
+ kLeakyReLU
};
class ElementwiseActivationLayer : public ::onert::exec::IFunction
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..1e17a0828 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,7 @@
#include "OperationUtils.h"
+#include <cker/operation/LogicalAnd.h>
#include <cker/operation/LogicalOr.h>
#include <cker/operation/MaxMin.h>
@@ -33,6 +34,25 @@ namespace ops
namespace
{
template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalAndBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalAndElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
IPortableTensor *output)
{
@@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
switch (op_type)
{
+ case ElementwiseBinaryType::kLogicalAnd:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalAndGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
case ElementwiseBinaryType::kLogicalOr:
if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index 066455e72..15d7f3049 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
}
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
{
if (!HaveSameShapes(input, output))
@@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
throw std::runtime_error{"Sin: Unsupported data type"};
}
break;
+ case ElementwiseUnaryType::kSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSquare:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = squareFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Square: Unsupported data type"};
+ }
+ break;
case ElementwiseUnaryType::kZerosLike:
if (input->data_type() == OperandType::FLOAT32)
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index c1765b5b7..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -46,6 +46,8 @@ enum class ElementwiseUnaryType
kRound,
kRSqrt,
kSin,
+ kSqrt,
+ kSquare,
kZerosLike
};
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
namespace ops
{
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
- _axis = axis;
_output = output;
}
void ExpandDimsLayer::run()
{
- // TODO use _axis to calculate shape of output when _axis is not constant
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
ExpandDimsLayer();
public:
- void configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output);
+ void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
private:
const IPortableTensor *_input;
- const IPortableTensor *_axis;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index df3f8b7cd..5b7c17907 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
namespace ops
{
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
{
- _input = input;
_value = value;
_output = output;
}
@@ -47,28 +45,24 @@ void FillLayer::run()
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<float *>(_value->buffer()),
+ nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<float *>(_output->buffer()));
break;
case OperandType::INT32:
- nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int32_t *>(_value->buffer()),
+ nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int32_t *>(_output->buffer()));
break;
case OperandType::INT64:
- nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int64_t *>(_value->buffer()),
+ nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int64_t *>(_output->buffer()));
break;
case OperandType::UINT32:
- nnfw::cker::Fill<uint32_t *>(
- getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
- reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<uint32_t *>(_output->buffer()));
break;
default:
throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
public:
FillLayer();
- void configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output);
+ void configure(const IPortableTensor *value, IPortableTensor *output);
void run() override;
private:
- const IPortableTensor *_input;
const IPortableTensor *_value;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..f130692ee 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
void MeanLayer::MeanFloat32()
{
- nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- getReducerAxes(_axes));
+ const auto inputShape = getTensorShape(_input);
+ const auto axisVec = getReducerAxes(_axes);
+ bool axis_is_1_and_2 =
+ _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+ ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+ if (axis_is_1_and_2)
+ {
+ nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ axisVec);
+ }
}
void MeanLayer::MeanQuant8()
@@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
_axes = axes;
_output = output;
_keep_dims = keep_dims;
+
+ if (_input->data_type() != OperandType::FLOAT32 &&
+ _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+ throw std::runtime_error{"Mean: unsupported data type"};
}
void MeanLayer::run()
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
new file mode 100644
index 000000000..bc8a024d8
--- /dev/null
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
new file mode 100644
index 000000000..ef686f480
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h
new file mode 100644
index 000000000..b965c9a9d
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt
new file mode 100644
index 000000000..206acbfbf
--- /dev/null
+++ b/runtime/onert/backend/ruy/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc
index dac8f898b..179caa9a6 100644
--- a/runtime/onert/backend/cpu/Tensor.cc
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -14,18 +14,18 @@
* limitations under the License.
*/
-#include "Tensor.h"
+#include "Config.h"
namespace onert
{
namespace backend
{
-namespace cpu
+namespace ruy
{
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h
new file mode 100644
index 000000000..9160dd5b1
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "ruy"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h
new file mode 100644
index 000000000..24b4d924d
--- /dev/null
+++ b/runtime/onert/backend/ruy/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
new file mode 100644
index 000000000..f51faccb8
--- /dev/null
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 4;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(new ::ruy::Context)
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
new file mode 100644
index 000000000..cd2825068
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+ if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+ {
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+ param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor, _external_context);
+
+ _return_fn = std::move(fn);
+ return;
+ }
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+ const auto weights_format = node.param().weights_format;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
new file mode 100644
index 000000000..0f6bd590a
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h
new file mode 100644
index 000000000..af2d25241
--- /dev/null
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h
new file mode 100644
index 000000000..60d0fbf77
--- /dev/null
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc
new file mode 100644
index 000000000..c77defc30
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // CPU backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h
new file mode 100644
index 000000000..91c07bd82
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..d249b2ce3
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+ // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::ruy::ConvParams op_params;
+ op_params.padding_type = getPaddingType(_paddingType);
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, const ir::PaddingType paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _paddingType = paddingType;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+ prepare();
+
+ if (_input->is_dynamic() || _kernel->is_dynamic())
+ {
+ const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+ const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto ker_shape = _kernel->getShape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ ir::Stride stride;
+ stride.vertical = _strideWidth;
+ stride.horizontal = _strideWidth;
+
+ ir::Padding param_padding;
+ param_padding.type = _paddingType;
+ param_padding.param.left = _paddingLeft;
+ param_padding.param.right = _paddingRight;
+ param_padding.param.top = _paddingTop;
+ param_padding.param.bottom = _paddingBottom;
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
+
+ _paddingLeft = padding.left;
+ _paddingRight = padding.right;
+ _paddingTop = padding.top;
+ _paddingBottom = padding.bottom;
+ }
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"Conv: unsupported data type"};
+ }
+}
+
+void ConvolutionLayer::prepare()
+{
+ if (_prepare)
+ return;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+ {
+ kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+ _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ }
+ _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..a55387b93
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+ ConvolutionLayer();
+ ~ConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType _paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _paddingType;
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
+
+ ir::Activation _activation;
+
+ std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+ bool _prepare;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..af693e3b4
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+ // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+ nnfw::ruy::FullyConnectedParams op_params;
+
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+ op_params.lhs_cacheable = _weights->is_constant();
+ op_params.rhs_cacheable = _input->is_constant();
+
+ nnfw::ruy::FullyConnected(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format,
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ UNUSED_RELEASE(weights_format);
+ _input = input;
+ _weights = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"FullyConnected: unsupported data type"};
+ }
+}
+
+void FullyConnectedLayer::prepare()
+{
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..33d560f0b
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+ FullyConnectedLayer();
+ ~FullyConnectedLayer();
+
+public:
+ void fullyConnectedFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_weights;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc
new file mode 100644
index 000000000..929107b1a
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+ switch (ir_padding_type)
+ {
+ case ir::PaddingType::EXPLICIT:
+ return nnfw::ruy::PaddingType::kNone;
+ case ir::PaddingType::SAME:
+ return nnfw::ruy::PaddingType::kSame;
+ case ir::PaddingType::VALID:
+ return nnfw::ruy::PaddingType::kValid;
+ default:
+ throw std::runtime_error("Wrong padding type.");
+ break;
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
new file mode 100644
index 000000000..5dfdc7ec5
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+ if (tensor == nullptr)
+ return nnfw::ruy::Shape();
+
+ const ir::Shape &shape = tensor->get_info().shape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::ruy::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+ switch (activation)
+ {
+ case ir::Activation::NONE:
+ return nnfw::ruy::FusedActivationFunctionType::kNone;
+ case ir::Activation::RELU:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu;
+ case ir::Activation::RELU1:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+ case ir::Activation::RELU6:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+ case ir::Activation::TANH:
+ return nnfw::ruy::FusedActivationFunctionType::kTanh;
+ case ir::Activation::SIGMOID:
+ return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+ default:
+ throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+ }
+}
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc
new file mode 100644
index 000000000..4f33590e9
--- /dev/null
+++ b/runtime/onert/backend/ruy/ruy.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
new file mode 100644
index 000000000..b7aef1625
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
new file mode 100644
index 000000000..503d088aa
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h
new file mode 100644
index 000000000..f81175b9e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(nullptr)
+ {
+ int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+ if (num_threads < 1)
+ num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+ _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt
new file mode 100644
index 000000000..e3de31e6f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+ return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
diff --git a/runtime/onert/backend/xnnpack/Config.cc b/runtime/onert/backend/xnnpack/Config.cc
new file mode 100644
index 000000000..4d42a3f18
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
+{
+ xnn_status status = xnn_initialize(nullptr /* allocator */);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to initialize XNNPACK"};
+ }
+ return true;
+}
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h
new file mode 100644
index 000000000..2cf7406e5
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+ virtual ~Config();
+
+public:
+ std::string id() override { return "xnnpack"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h
new file mode 100644
index 000000000..45cdd8cd9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.cc b/runtime/onert/backend/xnnpack/ExternalContext.cc
new file mode 100644
index 000000000..3a9fe1b55
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExternalContext.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+ExternalContext::ExternalContext(size_t num_threads)
+ : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
+{
+ assert(_threadpool);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h
new file mode 100644
index 000000000..682fd2e4e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext(size_t num_threads);
+
+public:
+ pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+ std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
new file mode 100644
index 000000000..b7d3f60fb
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto dilation_width = node.param().dilation.width_factor;
+ const auto dilation_height = node.param().dilation.height_factor;
+ const auto param_padding = node.param().padding;
+ const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+ ker_height, dilation_width, dilation_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
new file mode 100644
index 000000000..265824204
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h
new file mode 100644
index 000000000..f7344e8d8
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h
new file mode 100644
index 000000000..b39cbd266
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc
new file mode 100644
index 000000000..b570144ce
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // XNNPACK backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h
new file mode 100644
index 000000000..dddfedbf9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on XNNPACK backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..0612995c2
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 Convolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+ }
+}
+
+bool ConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(3);
+ assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ 1 /* groups */, input_channels /* group_input_channels */,
+ output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, 0, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..6cbaa9f3a
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+ ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..947f04194
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+ const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+ const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+ ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+ const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _multiplier = multiplier;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+ }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(3);
+ uint32_t input_channels = _input->getShape().dim(3);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+ assert(output_channels == input_channels * _multiplier);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ input_channels /* groups */, 1 /* group_input_channels */,
+ _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..10f840ae7
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+ DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation,
+ IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _multiplier;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..d595fda36
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ IPortableTensor *output)
+{
+ _input = input;
+ _kernel = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+ }
+}
+
+bool FullyConnectedLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ const auto &kernel_shape = _kernel->getShape();
+ assert(kernel_shape.rank() == 2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(1);
+
+ const auto &input_shape = _input->getShape();
+ const auto &output_shape = _output->getShape();
+ uint32_t flag = 0;
+ if (input_shape.rank() != output_shape.rank())
+ {
+ flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+ assert(input_shape.num_elements() % input_channels == 0);
+ }
+ else
+ {
+ assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+ }
+
+ assert(_kernel && _kernel->buffer());
+ const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+ const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+ enum xnn_status status = xnn_create_fully_connected_nc_f32(
+ input_channels, output_channels, input_channels /* input stride */,
+ output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+ output_activation_max, flag, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+ enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+ _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..883607ef9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+ FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+ const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
new file mode 100644
index 000000000..68b610f33
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+ Layer(const std::shared_ptr<ExternalContext> external_context)
+ : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+ {
+ // DO NOTHING
+ }
+
+ ~Layer()
+ {
+ if (_kernel_op)
+ xnn_delete_operator(_kernel_op);
+ }
+
+public:
+ void prepare() override
+ {
+ if (_create)
+ return;
+
+ _create = create();
+ assert(_create);
+
+ _setup = setup();
+ }
+ virtual bool create() = 0;
+ virtual bool setup() = 0;
+
+protected:
+ xnn_operator_t _kernel_op;
+ bool _create;
+ bool _setup;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
new file mode 100644
index 000000000..5102e32dd
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+// duplicated from cpu/ops/OperationUtils.h
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <ir/DataType.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported fused activation function"};
+ }
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc
new file mode 100644
index 000000000..38a6c5572
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/xnnpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+ return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+ delete backend;
+}
+}