summaryrefslogtreecommitdiff
path: root/runtime/onert/backend/cpu
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
commit62529acabbafce7730601ed01d5709d7bc0d378a (patch)
treebf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert/backend/cpu
parent6ea13af5257155ff993c205cf997b870cc627f73 (diff)
downloadnnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'runtime/onert/backend/cpu')
-rw-r--r--runtime/onert/backend/cpu/Backend.h2
-rw-r--r--runtime/onert/backend/cpu/BackendContext.cc147
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h35
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc94
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h38
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h3
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc72
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h76
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.cc107
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.h33
-rw-r--r--runtime/onert/backend/cpu/Tensor.cc32
-rw-r--r--runtime/onert/backend/cpu/Tensor.h87
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h19
-rw-r--r--runtime/onert/backend/cpu/cpu.cc15
-rw-r--r--runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc6
-rw-r--r--runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/ConcatLayer.cc34
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc78
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc14
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc32
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h2
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc7
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.cc22
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/MeanLayer.cc25
32 files changed, 581 insertions, 534 deletions
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..0b416a7e9 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -54,8 +54,6 @@ public:
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..6b958c1b7
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..0a4106d33 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,9 @@
#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
@@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(new ExternalContext)
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
deleted file mode 100644
index 6f6eb77bc..000000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
- auto data = model_obj.shareData();
- assert(data && data->base());
- ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
- tensor.setData(data);
- };
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerExternalInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- if (!bias_index.undefined())
- {
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
- }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index c016c83bc..d7858c0f6 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -14,13 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
@@ -29,35 +26,10 @@ namespace backend
namespace cpu
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
- void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
- // TODO: For now the only cpu backend supports constant tensor to use data from external
- // If the other backend supports (to do this,
- // ExternalTensor should be abstract such as IExternal, maybe),
- // this can be an interface of IConstantInitializer
- void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace cpu
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 32e249f5a..f5d11f4f1 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,7 +17,6 @@
#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
@@ -33,7 +32,7 @@ namespace backend
namespace cpu
{
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(new ruy::Context)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 451815b65..25756eced 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -23,6 +23,7 @@
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
#include "ops/EinsumLayer.h"
#include "ops/ElementwiseActivationLayer.h"
@@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
{
switch (type_ir)
{
+ case ir::operation::ElementwiseActivation::Type::ELU:
+ return ops::ElementwiseActivationType::kElu;
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
return ops::ElementwiseActivationType::kLogistic;
case ir::operation::ElementwiseActivation::Type::RELU:
return ops::ElementwiseActivationType::kReLU;
case ir::operation::ElementwiseActivation::Type::TANH:
return ops::ElementwiseActivationType::kTanh;
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ops::ElementwiseActivationType::kLeakyReLU;
default:
throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
}
@@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
{
switch (type_ir)
{
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ return ops::ElementwiseBinaryType::kLogicalAnd;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
return ops::ElementwiseBinaryType::kLogicalOr;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
return ops::ElementwiseUnaryType::kRSqrt;
case ir::operation::ElementwiseUnary::Type::SIN:
return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ return ops::ElementwiseUnaryType::kSqrt;
+ case ir::operation::ElementwiseUnary::Type::SQUARE:
+ return ops::ElementwiseUnaryType::kSquare;
case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
return ops::ElementwiseUnaryType::kZerosLike;
default:
@@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
_tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
_return_fn = std::move(fn);
return;
}
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
- dilation_height, activation, ofm_tensor);
+ dilation_height, activation, ofm_tensor, _external_context);
_return_fn = std::move(fn);
}
@@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
@@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
void KernelGenerator::visit(const ir::operation::Fill &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+ // SHAPE input is used for shape inference
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
- auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto value_tensor = _tensor_reg->getPortableTensor(value_index);
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_tensor, value_tensor, output_tensor);
+ fn->configure(value_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
- assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+ assert(input_shape.rank() < 4 || _current_layout == backend_layout);
const auto axis_raw = node.param().axis;
const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
- // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ // TODO make sure using `_current_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
@@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ // AXIS input is used for output shape inference
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(-rank <= axis && axis < rank);
@@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
const auto input_index{node.getInputs().at(0)};
const auto rank = _ctx.at(input_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(rank == 0 || (-rank <= axis && axis < rank));
@@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+ const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
@@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
_return_fn = std::move(fn);
}
@@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
const auto padding =
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
@@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+ auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 5df77607f..3a4cfbffa 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -23,7 +23,7 @@
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
@@ -34,7 +34,7 @@ namespace backend
namespace cpu
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -43,59 +43,59 @@ public:
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- using IKernelGenerator::visit;
+ void visit(const ir::OpSequence &) override;
void visit(const ir::operation::AddN &) override;
- void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::ArgMinMax &) override;
+ void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::BroadcastTo &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::Custom &node) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::Fill &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Custom &node) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::Fill &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::FusedBatchNorm &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Select &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Shape &) override;
- void visit(const ir::operation::ResizeBilinear &node) override;
- void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::Rank &) override;
- void visit(const ir::operation::MatrixBandPart &) override;
- void visit(const ir::operation::BatchMatMul &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::BroadcastTo &) override;
- void visit(const ir::operation::FusedBatchNorm &) override;
- void visit(const ir::operation::LogSoftmax &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
+ void visit(const ir::operation::Reverse &) override;
+ void visit(const ir::operation::Select &) override;
+ void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Tile &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
private:
const ir::Operands &_ctx;
@@ -103,7 +103,7 @@ private:
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 3edac897c..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager)
- : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
-{
- // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (!_as_constants[ind] && !tensor->is_dynamic())
- {
- auto *buffer = _nonconst_mgr->getBuffer(ind);
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
- }
- }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(!_tensors->getITensor(ind));
- if (as_const)
- {
- auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- else
- {
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
- _dynamic_tensor_manager->dynamic_mem_mgr().get());
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (const auto &it : _tensors->native_tensors())
- fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..d07f0c814 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
namespace onert
{
@@ -32,30 +26,7 @@ namespace backend
namespace cpu
{
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
- StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager);
- virtual ~StaticTensorManager() = default;
-
- void allocateNonconsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- ir::OperandIndexMap<bool> _as_constants;
- cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/cpu/Tensor.cc
deleted file mode 100644
index dac8f898b..000000000
--- a/runtime/onert/backend/cpu/Tensor.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 2ad2ad0fb..d663c3f50 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -28,92 +28,7 @@ namespace cpu
{
using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- * instead of allocating and copying the data. ExternalTensor's data pointer points to
- * an address of memory such as where memory is already allocated, or mmapped area.
- * This is meaning that ExternalTensor can take all of types' ir::Data.
- * To support this, assume below things no padding, always NHWC layout,
- * constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
- ExternalTensor() = delete;
- virtual ~ExternalTensor();
-
-public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
- : Tensor(info, layout, nullptr)
- {
- assert(_layout == ir::Layout::NHWC);
- assert(_info.isConstant());
- assert(_info.isDynamic() == false);
- }
-
-public:
- /**
- * @brief set Data to be shared from external so that this ExternalTensor will not be
- * allocated on CPU backend
- * @param[in] data data of Operand to be set
- */
- void setData(const std::shared_ptr<ir::Data> data)
- {
- assert(data != nullptr);
- _data = data;
- // Note. Some op such as cker::Conv could take buffer as nullptr.
- // That's why _buffer also would be used
- _buffer = const_cast<uint8_t *>(_data->base());
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
-
- bool is_constant() const override { return true; }
- bool is_dynamic() const override { return false; }
- void set_dynamic() override
- {
- throw std::runtime_error("This tensor does not support changing dynamic");
- }
-
- void setShape(const ir::Shape &) override
- {
- throw std::runtime_error("This tensor does not support changing shape");
- }
-
- void increase_ref() override { ++_num_references; }
-
- void decrease_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- --_num_references;
- if (_num_references == 0)
- {
- _data.reset();
- _buffer = nullptr;
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- void reset_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- _data.reset();
- _buffer = nullptr;
- }
-
- int32_t num_references() override { return _num_references; }
-
-private:
- std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 448abc229..9d8a5deb5 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -20,7 +20,6 @@
#include <backend/cpu_common/DynamicTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include "StaticTensorManager.h"
@@ -35,7 +34,7 @@ namespace backend
namespace cpu
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
#include "Backend.h"
-#include <util/logging.h>
-
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'cpu' loaded\n";
- return new onert::backend::cpu::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index 2fd284c91..d5ffdef0b 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -79,6 +79,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
break;
@@ -97,6 +100,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
break;
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..ba9655924 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
void BatchMatMulLayer::run()
{
- if (_lhs->data_type() == OperandType::FLOAT32)
+ if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
{
batchMatMulFloat32();
}
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..edfdfc1a6 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
void ConcatLayer::run()
{
- if (_output->data_type() == OperandType::FLOAT32)
+ switch (_output->data_type())
{
- concatenationGeneral<float>();
+ case OperandType::FLOAT32:
+ concatenationGeneral<float>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ concatenationQuant8();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ concatenationGeneral<int8_t>();
+ break;
+ case OperandType::INT32:
+ concatenationGeneral<int32_t>();
+ break;
+ case OperandType::INT64:
+ concatenationGeneral<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Concat: unsupported data type");
}
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- concatenationQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- concatenationGeneral<int32_t>();
- }
- else if (_output->data_type() == OperandType::INT64)
- {
- concatenationGeneral<int64_t>();
- }
- else
- throw std::runtime_error("Concat: unsupported data type");
}
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 799e9e2d0..c964e38f9 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -203,8 +203,6 @@ void ConvolutionLayer::prepare()
_prepare = true;
}
-#undef ANDROID_NN_CONV_PARAMETERS
-
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..d265d0ac2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+ nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+ _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ depthToSpace<float>();
+ break;
+ case OperandType::INT32:
+ depthToSpace<int32_t>();
+ break;
+ case OperandType::INT64:
+ depthToSpace<int64_t>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ depthToSpace<uint8_t>();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ depthToSpace<int8_t>();
+ break;
+ default:
+ throw std::runtime_error{"DepthToSpace: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+ DepthToSpaceLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void depthToSpace();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index f1dc1103a..85553d14d 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32()
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<float, float>(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::convQuant8()
@@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8()
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::configure(
@@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure(
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
- const ir::Activation activation, IPortableTensor *output)
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_kernel = kernel;
@@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure(
_dilationHeight = dilationHeight;
_activation = activation;
_output = output;
+ _external_context = external_context;
}
void DepthwiseConvolutionLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index fb032ecbf..fe1fcc182 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
+#include "../ExternalContext.h"
#include <exec/IFunction.h>
@@ -47,7 +48,7 @@ public:
const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
const uint32_t multiplier, const uint32_t dilationWidth,
const uint32_t dilationHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
void run() override;
@@ -71,6 +72,8 @@ private:
uint32_t _dilationHeight{1};
ir::Activation _activation{ir::Activation::NONE};
+
+ std::shared_ptr<ExternalContext> _external_context;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..3e1da5ec0 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
#include "OperationUtils.h"
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/ReLU.h>
#include <cker/operation/ReLU6.h>
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
switch (op_type)
{
+ case ElementwiseActivationType::kElu:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+ }
+ break;
case ElementwiseActivationType::kLogistic:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
@@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
}
break;
+ case ElementwiseActivationType::kLeakyReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+ }
+ break;
default:
throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
}
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..948ab3b57 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
enum class ElementwiseActivationType
{
+ kElu,
kLogistic,
kReLU,
- kTanh
+ kTanh,
+ kLeakyReLU
};
class ElementwiseActivationLayer : public ::onert::exec::IFunction
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..1e17a0828 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,7 @@
#include "OperationUtils.h"
+#include <cker/operation/LogicalAnd.h>
#include <cker/operation/LogicalOr.h>
#include <cker/operation/MaxMin.h>
@@ -33,6 +34,25 @@ namespace ops
namespace
{
template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalAndBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalAndElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
IPortableTensor *output)
{
@@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
switch (op_type)
{
+ case ElementwiseBinaryType::kLogicalAnd:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalAndGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
case ElementwiseBinaryType::kLogicalOr:
if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index 066455e72..15d7f3049 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
}
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
{
if (!HaveSameShapes(input, output))
@@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
throw std::runtime_error{"Sin: Unsupported data type"};
}
break;
+ case ElementwiseUnaryType::kSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSquare:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = squareFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Square: Unsupported data type"};
+ }
+ break;
case ElementwiseUnaryType::kZerosLike:
if (input->data_type() == OperandType::FLOAT32)
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index c1765b5b7..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -46,6 +46,8 @@ enum class ElementwiseUnaryType
kRound,
kRSqrt,
kSin,
+ kSqrt,
+ kSquare,
kZerosLike
};
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
namespace ops
{
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
- _axis = axis;
_output = output;
}
void ExpandDimsLayer::run()
{
- // TODO use _axis to calculate shape of output when _axis is not constant
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
ExpandDimsLayer();
public:
- void configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output);
+ void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
private:
const IPortableTensor *_input;
- const IPortableTensor *_axis;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index df3f8b7cd..5b7c17907 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
namespace ops
{
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
{
- _input = input;
_value = value;
_output = output;
}
@@ -47,28 +45,24 @@ void FillLayer::run()
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<float *>(_value->buffer()),
+ nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<float *>(_output->buffer()));
break;
case OperandType::INT32:
- nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int32_t *>(_value->buffer()),
+ nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int32_t *>(_output->buffer()));
break;
case OperandType::INT64:
- nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int64_t *>(_value->buffer()),
+ nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int64_t *>(_output->buffer()));
break;
case OperandType::UINT32:
- nnfw::cker::Fill<uint32_t *>(
- getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
- reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<uint32_t *>(_output->buffer()));
break;
default:
throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
public:
FillLayer();
- void configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output);
+ void configure(const IPortableTensor *value, IPortableTensor *output);
void run() override;
private:
- const IPortableTensor *_input;
const IPortableTensor *_value;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..f130692ee 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
void MeanLayer::MeanFloat32()
{
- nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- getReducerAxes(_axes));
+ const auto inputShape = getTensorShape(_input);
+ const auto axisVec = getReducerAxes(_axes);
+ bool axis_is_1_and_2 =
+ _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+ ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+ if (axis_is_1_and_2)
+ {
+ nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ axisVec);
+ }
}
void MeanLayer::MeanQuant8()
@@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
_axes = axes;
_output = output;
_keep_dims = keep_dims;
+
+ if (_input->data_type() != OperandType::FLOAT32 &&
+ _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+ throw std::runtime_error{"Mean: unsupported data type"};
}
void MeanLayer::run()