diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-12-14 14:43:43 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-12-14 14:43:43 +0900 |
commit | 62529acabbafce7730601ed01d5709d7bc0d378a (patch) | |
tree | bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert/backend | |
parent | 6ea13af5257155ff993c205cf997b870cc627f73 (diff) | |
download | nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2 nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip |
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'runtime/onert/backend')
98 files changed, 4947 insertions, 686 deletions
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt index 42d622aa8..dc038c975 100644 --- a/runtime/onert/backend/CMakeLists.txt +++ b/runtime/onert/backend/CMakeLists.txt @@ -4,3 +4,5 @@ add_subdirectory(cpu) add_subdirectory(acl_cl) add_subdirectory(acl_neon) add_subdirectory(acl_common) +add_subdirectory(ruy) +add_subdirectory(xnnpack) diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 5c5041378..4f48314c1 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -20,6 +20,7 @@ #include <memory> #include <backend/Backend.h> +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr<IConfig> config() const override { return _config; } - std::unique_ptr<BackendContext> newContext(const ir::Graph &graph, - const std::shared_ptr<custom::IKernelBuilder> &, - bool is_linear_executor) const override + std::unique_ptr<backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique<BackendContext>(this, &graph); + auto context = std::make_unique<acl_cl::BackendContext>(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); - auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; } diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc new file mode 100644 index 000000000..a6f228a4f --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace acl_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h new file mode 100644 index 000000000..662d767d0 --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + std::shared_ptr<Optimizer> optimizer; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index b45b91058..413a7ccc3 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node) const auto &axis_obj = _operands.at(axis_index); const auto ifm_rank = input_obj.shape().rank(); - const auto frontend_layout = this->_current_op_seq_layout; + const auto frontend_layout = this->_current_layout; auto output_tensor = this->_tensor_reg->getITensor(output_index); const auto backend_layout = output_tensor->layout(); diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h index 9f3acb461..fc0eca84f 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.h +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -45,4 +45,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc index e7690af2e..3a5ea5a0f 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.cc +++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc @@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<TensorBuilder> &tensor_builder, const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique<exec::FunctionSequence>(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector<int32_t>(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor, ::arm_compute::CLFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); std::unique_ptr<arm_compute::IFunction> fn; @@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto &perms = _ctx.at(perm_idx); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx); const size_t output_rank = _ctx.at(output_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); @@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) UNUSED_RELEASE(backend_layout); assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node) _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; auto ifm_shape = _ctx.at(ifm_index).shape(); auto ofm_shape = _ctx.at(ofm_index).shape(); @@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar<int32_t>(); @@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>( - ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), - ::arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar<int32_t>(); if (axis < 0) @@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node) { int32_t split_dim = split_dim_op.asScalar<int32_t>(); uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions()) @@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto input = _tensor_reg->getAclTensor(input_index)->handle(); auto output = _tensor_reg->getAclTensor(output_index)->handle(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); ::arm_compute::PaddingList padding_list; diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h index e8a922677..22a7c18a3 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.h +++ b/runtime/onert/backend/acl_cl/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_cl { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,60 +39,61 @@ public: const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::ConvertFp16ToFp32 &) override; + void visit(const ir::operation::ConvertFp32ToFp16 &) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; + void visit(const ir::operation::EmbeddingLookup &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::InstanceNorm &) override; - void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::ResizeNearestNeighbor &) override; + void visit(const ir::operation::Reverse &) override; void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::L2Normalization &) override; - void visit(const ir::operation::HashtableLookup &) override; - void visit(const ir::operation::PReLU &) override; - void visit(const ir::operation::TransposeConv &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::TopKV2 &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::LocalResponseNormalization &) override; - void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::TopKV2 &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::ConvertFp32ToFp16 &) override; - void visit(const ir::operation::ConvertFp16ToFp32 &) override; - void visit(const ir::operation::Reverse &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h index 18d38ec1b..ad5154860 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.h +++ b/runtime/onert/backend/acl_cl/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ -#include <backend/IOptimizer.h> -#include <backend/BackendContext.h> +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_cl { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc index 88378b13a..82cbde02f 100644 --- a/runtime/onert/backend/acl_cl/acl_cl.cc +++ b/runtime/onert/backend/acl_cl/acl_cl.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include <util/logging.h> - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_cl' loaded\n"; - return new onert::backend::acl_cl::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc index 21f41a3e6..921d107d9 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc @@ -25,7 +25,7 @@ namespace acl_common AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands, const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} + : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h index 52f4c54cf..894e2e7d1 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.h +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> +#include <backend/cpu_common/ConstantInitializerBase.h> #include <ir/Operands.h> #include "AclTensorRegistry.h" @@ -28,7 +28,7 @@ namespace backend namespace acl_common { -class AclConstantInitializer : public IConstantInitializer +class AclConstantInitializer : public cpu_common::ConstantInitializerBase { public: AclConstantInitializer(const ir::Operands &operands, diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index bb7abc95d..12e9ab894 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -21,7 +21,6 @@ #include <queue> #include <arm_compute/core/Types.h> -#include <backend/ITensorBuilder.h> #include "ir/OperandIndexMap.h" #include <ir/Operands.h> #include "AclTensorManager.h" @@ -43,14 +42,12 @@ enum class UsesType LAST }; -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -class AclTensorBuilder : public ITensorBuilder +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder { public: using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; - AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg); + AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); /** * @brief Register tensor information to allocate on ACL-CL backend @@ -59,16 +56,16 @@ public: * @param[in] layout Tensor data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override; + void prepare(void); + void allocate(); + void postFunctionPrepare(); T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } @@ -105,7 +102,6 @@ private: ir::OperandIndexMap<size_t> _uses_count_map; std::unique_ptr<T_AclTensorManager> _tensor_mgr; - std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg; // for linear executor std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; @@ -133,10 +129,9 @@ namespace acl_common { template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder( - const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg) - : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg} +AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands, + T_AclTensorManager *tensor_mgr) + : _operands{operands}, _tensor_mgr{tensor_mgr} { assert(_tensor_mgr); } diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc index 67d9d7176..7d3a69032 100644 --- a/runtime/onert/backend/acl_common/Convert.cc +++ b/runtime/onert/backend/acl_common/Convert.cc @@ -109,13 +109,19 @@ namespace acl_common case ir::DataType::UINT8: return ::arm_compute::DataType::U8; case ir::DataType::QUANT_INT8_SYMM: - return ::arm_compute::DataType::S8; + return ::arm_compute::DataType::QSYMM8; + case ir::DataType::QUANT_INT8_ASYMM: + return ::arm_compute::DataType::QASYMM8_SIGNED; case ir::DataType::FLOAT16: return ::arm_compute::DataType::F16; case ir::DataType::INT64: return ::arm_compute::DataType::S64; + case ir::DataType::QUANT_INT16_ASYMM: + return ::arm_compute::DataType::QASYMM16; + case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL: + return ::arm_compute::DataType::QSYMM8_PER_CHANNEL; default: - throw std::runtime_error("Not supported, yet"); + throw std::runtime_error("Not supported internal data type, yet"); break; } } @@ -175,7 +181,7 @@ namespace acl_common return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal activation, yet"}; break; } } @@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal elementwise activation, yet"}; break; } } @@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) return ir::DataType::UINT32; case ::arm_compute::DataType::QASYMM8: return ir::DataType::QUANT_UINT8_ASYMM; + case ::arm_compute::DataType::QASYMM8_SIGNED: + return ir::DataType::QUANT_INT8_ASYMM; case ::arm_compute::DataType::U8: return ir::DataType::UINT8; case ::arm_compute::DataType::QSYMM8: @@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) case ::arm_compute::DataType::S64: return ir::DataType::INT64; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported acl data type, yet"}; break; } } diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index 35d6e4e8e..b11c19733 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -21,6 +21,7 @@ #include <backend/Backend.h> #include <ir/Operands.h> +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr<IConfig> config() const override { return _config; } - std::unique_ptr<BackendContext> newContext(const ir::Graph &graph, - const std::shared_ptr<custom::IKernelBuilder> &, - bool is_linear_executor) const override + std::unique_ptr<backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique<BackendContext>(this, &graph); + auto context = std::make_unique<acl_neon::BackendContext>(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); - auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; } diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc new file mode 100644 index 000000000..8b53171f7 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace neon +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h new file mode 100644 index 000000000..dd764c091 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + std::shared_ptr<Optimizer> optimizer; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h index c7d71cdcf..9723ba012 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.h +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -41,4 +41,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index ffaee3b3e..e712dfa81 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<TensorBuilder> &tensor_builder, const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique<exec::FunctionSequence>(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) } } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar<int32_t>(); @@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) assert(axis_value >= 0 && axis_value < ifm_rank); const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>( - ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), - arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector<int32_t>(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor, ::arm_compute::NEFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) { const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); const auto axis = acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); @@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); const auto reduce_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); @@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar<int32_t>(); if (axis < 0) @@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto rank = _ctx.at(ifm_idx).shape().rank(); @@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx); const size_t output_rank = _ctx.at(out_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h index 4d269cde5..2a4b307b8 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.h +++ b/runtime/onert/backend/acl_neon/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_neon { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,17 +39,20 @@ public: const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; - void visit(const ir::operation::ArgMax &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::FullyConnected &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::HashtableLookup &) override; @@ -57,36 +60,34 @@ public: void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::Reduce &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Slice &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Squeeze &) override; void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::ExpandDims &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::OneHot &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h index 5fe0d519c..b8fb343e9 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.h +++ b/runtime/onert/backend/acl_neon/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ -#include <backend/IOptimizer.h> -#include <backend/BackendContext.h> +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_neon { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc index f490d132d..6535fb291 100644 --- a/runtime/onert/backend/acl_neon/acl_neon.cc +++ b/runtime/onert/backend/acl_neon/acl_neon.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include <util/logging.h> - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_neon' loaded\n"; - return new onert::backend::acl_neon::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index fc8574b26..0b416a7e9 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -54,8 +54,6 @@ public: context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, context->external_context()); - context->tensor_register = nullptr; - context->optimizer = nullptr; return context; } diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc new file mode 100644 index 000000000..6b958c1b7 --- /dev/null +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h index e90b21054..0a4106d33 100644 --- a/runtime/onert/backend/cpu/BackendContext.h +++ b/runtime/onert/backend/cpu/BackendContext.h @@ -18,6 +18,9 @@ #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__ #include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" #include "ExternalContext.h" namespace onert @@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext public: BackendContext(const Backend *backend, const ir::Graph *graph, std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, - std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, - std::shared_ptr<ITensorRegister> tensor_register = nullptr, - std::shared_ptr<IOptimizer> optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, - constant_initializer, kernel_gen, tensor_register, - optimizer), - _external_context(new ExternalContext) + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) { } + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + std::shared_ptr<ExternalContext> external_context() { return _external_context; } private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, // the thread pool is also created in duplicate // TODO Create one ruy context for session diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc deleted file mode 100644 index 6f6eb77bc..000000000 --- a/runtime/onert/backend/cpu/ConstantInitializer.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ConstantInitializer.h" -#include "Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace cpu -{ - -ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} -{ - // DO NOTHING -} - -void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) -{ - registerExternalInitializer(index, obj); -} - -void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) -{ - // For only CONSTANTS - // TODO Add to check if tensor has been allocated - if (!obj.isConstant()) - return; - - _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) { - auto data = model_obj.shareData(); - assert(data && data->base()); - ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor); - tensor.setData(data); - }; -} - -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); - const auto &weight_obj = _operands.at(weight_index); - registerExternalInitializer(weight_index, weight_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); - if (!bias_index.undefined()) - { - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); - } -} - -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h index c016c83bc..d7858c0f6 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.h +++ b/runtime/onert/backend/cpu/ConstantInitializer.h @@ -14,13 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ -#include "backend/cpu_common/TensorRegistry.h" - -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> +#include <backend/cpu_common/ConstantInitializer.h> namespace onert { @@ -29,35 +26,10 @@ namespace backend namespace cpu { -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<ITensorRegistry> &tensor_reg); - -public: - void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; - - // TODO: For now the only cpu backend supports constant tensor to use data from external - // If the other backend supports (to do this, - // ExternalTensor should be abstract such as IExternal, maybe), - // this can be an interface of IConstantInitializer - void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &); - -public: - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::FullyConnected &) override; - -private: - std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr<ITensorRegistry> _tensor_reg; -}; +using ConstantInitializer = cpu_common::ConstantInitializer; } // namespace cpu } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index 32e249f5a..f5d11f4f1 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ -#include <backend/IExternalContext.h> #include <util/ConfigSource.h> #include <ruy/context.h> @@ -33,7 +32,7 @@ namespace backend namespace cpu { -class ExternalContext : public IExternalContext +class ExternalContext { public: ExternalContext() : _ruy_context(new ruy::Context) diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 451815b65..25756eced 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -23,6 +23,7 @@ #include "ops/CompareLayer.h" #include "ops/ConcatLayer.h" #include "ops/ConvolutionLayer.h" +#include "ops/DepthToSpaceLayer.h" #include "ops/DepthwiseConvolutionLayer.h" #include "ops/EinsumLayer.h" #include "ops/ElementwiseActivationLayer.h" @@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type { switch (type_ir) { + case ir::operation::ElementwiseActivation::Type::ELU: + return ops::ElementwiseActivationType::kElu; case ir::operation::ElementwiseActivation::Type::LOGISTIC: return ops::ElementwiseActivationType::kLogistic; case ir::operation::ElementwiseActivation::Type::RELU: return ops::ElementwiseActivationType::kReLU; case ir::operation::ElementwiseActivation::Type::TANH: return ops::ElementwiseActivationType::kTanh; + case ir::operation::ElementwiseActivation::Type::LEAKY_RELU: + return ops::ElementwiseActivationType::kLeakyReLU; default: throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); } @@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary { switch (type_ir) { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + return ops::ElementwiseBinaryType::kLogicalAnd; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: return ops::ElementwiseBinaryType::kLogicalOr; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: @@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise return ops::ElementwiseUnaryType::kRSqrt; case ir::operation::ElementwiseUnary::Type::SIN: return ops::ElementwiseUnaryType::kSin; + case ir::operation::ElementwiseUnary::Type::SQRT: + return ops::ElementwiseUnaryType::kSqrt; + case ir::operation::ElementwiseUnary::Type::SQUARE: + return ops::ElementwiseUnaryType::kSquare; case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE: return ops::ElementwiseUnaryType::kZerosLike; default: @@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<ExternalContext> &external_context) : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context) + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) { // DO NOTHING } @@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); } - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) _return_fn = std::move(fn); return; } - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width, - dilation_height, activation, ofm_tensor); + dilation_height, activation, ofm_tensor, _external_context); _return_fn = std::move(fn); } @@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); auto output_tensor = _tensor_reg->getPortableTensor(ofm_index); @@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) void KernelGenerator::visit(const ir::operation::Fill &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)}; + // SHAPE input is used for shape inference const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); - auto input_tensor = _tensor_reg->getPortableTensor(input_index); auto value_tensor = _tensor_reg->getPortableTensor(value_index); auto fn = std::make_unique<ops::FillLayer>(); - fn->configure(input_tensor, value_tensor, output_tensor); + fn->configure(value_tensor, output_tensor); _return_fn = std::move(fn); } @@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); const auto &input_shape = _ctx.at(input_index).shape(); UNUSED_RELEASE(input_shape); - assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout); + assert(input_shape.rank() < 4 || _current_layout == backend_layout); const auto axis_raw = node.param().axis; const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw); @@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node) for (auto &idx : opSeq) { const auto &operand = _ctx.at(idx); - // TODO make sure using `_current_op_seq_layout` is correct for custom operations + // TODO make sure using `_current_layout` is correct for custom operations types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()}); auto in_tensor = _tensor_reg->getPortableTensor(idx); tensors.emplace_back(in_tensor); @@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + // AXIS input is used for output shape inference auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); - auto axis_tensor = _tensor_reg->getPortableTensor(axis_index); auto fn = std::make_unique<ops::ExpandDimsLayer>(); - fn->configure(input_tensor, axis_tensor, output_tensor); + fn->configure(input_tensor, output_tensor); _return_fn = std::move(fn); } @@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(-rank <= axis && axis < rank); @@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) const auto input_index{node.getInputs().at(0)}; const auto rank = _ctx.at(input_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(rank == 0 || (-rank <= axis && axis < rank)); @@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)}; + const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); @@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto fn = std::make_unique<ops::ArgMinMaxLayer>(); - fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true); + fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max); _return_fn = std::move(fn); } @@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node) const auto kh = node.param().kh; const auto kw = node.param().kw; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); const auto activation = node.param().activation; @@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) _return_fn = std::move(fn); } +void KernelGenerator::visit(const ir::operation::DepthToSpace &node) +{ + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + auto block_size = node.param().block_size; + + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + + auto fn = std::make_unique<ops::DepthToSpaceLayer>(); + + fn->configure(input_tensor, block_size, output_tensor); + _return_fn = std::move(fn); +} + void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) { const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index 5df77607f..3a4cfbffa 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -23,7 +23,7 @@ #include "Tensor.h" #include <backend/CustomKernelBuilder.h> -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include <ir/Operands.h> #include <ir/Operations.h> @@ -34,7 +34,7 @@ namespace backend namespace cpu { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -43,59 +43,59 @@ public: const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context); - using IKernelGenerator::visit; + void visit(const ir::OpSequence &) override; void visit(const ir::operation::AddN &) override; - void visit(const ir::OpSequence &) override; + void visit(const ir::operation::ArgMinMax &) override; + void visit(const ir::operation::BatchMatMul &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::BroadcastTo &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::Custom &node) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::Fill &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Einsum &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::Custom &node) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::Fill &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::FusedBatchNorm &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::MatrixBandPart &) override; void visit(const ir::operation::OneHot &) override; - void visit(const ir::operation::Transpose &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Select &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Split &) override; - void visit(const ir::operation::Shape &) override; - void visit(const ir::operation::ResizeBilinear &node) override; - void visit(const ir::operation::Reverse &) override; - void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Pow &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Tile &) override; - void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::Range &) override; void visit(const ir::operation::Rank &) override; - void visit(const ir::operation::MatrixBandPart &) override; - void visit(const ir::operation::BatchMatMul &) override; - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::BroadcastTo &) override; - void visit(const ir::operation::FusedBatchNorm &) override; - void visit(const ir::operation::LogSoftmax &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::ResizeBilinear &node) override; + void visit(const ir::operation::Reverse &) override; + void visit(const ir::operation::Select &) override; + void visit(const ir::operation::Shape &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::Tile &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Unpack &) override; private: const ir::Operands &_ctx; @@ -103,7 +103,7 @@ private: std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc deleted file mode 100644 index 3edac897c..000000000 --- a/runtime/onert/backend/cpu/StaticTensorManager.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "StaticTensorManager.h" -#include "Tensor.h" - -#include <util/logging.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ - -StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager) - : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} -{ - // DO NOTHING -} - -void StaticTensorManager::allocateNonconsts(void) -{ - _nonconst_mgr->allocate(); - - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second.get(); - if (!_as_constants[ind] && !tensor->is_dynamic()) - { - auto *buffer = _nonconst_mgr->getBuffer(ind); - tensor->setBuffer(buffer); - - VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) << std::endl; - } - } -} - -void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } - -void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, - const ir::OperandInfo &tensor_info, ir::Layout backend_layout, - bool as_const) -{ - assert(!_tensors->getITensor(ind)); - if (as_const) - { - auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - else - { - auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, - _dynamic_tensor_manager->dynamic_mem_mgr().get()); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - _as_constants[ind] = as_const; -} - -void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->claimPlan(ind, size); -} - -void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->releasePlan(ind); -} - -void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn) -{ - for (const auto &it : _tensors->native_tensors()) - fn(it.first); -} - -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h index 2af61e4e7..d07f0c814 100644 --- a/runtime/onert/backend/cpu/StaticTensorManager.h +++ b/runtime/onert/backend/cpu/StaticTensorManager.h @@ -17,13 +17,7 @@ #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ -#include "backend/IStaticTensorManager.h" -#include "backend/cpu_common/DynamicTensorManager.h" -#include "backend/cpu_common/MemoryManager.h" -#include "backend/cpu_common/TensorRegistry.h" -#include "backend/ITensorManager.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperandInfo.h" +#include "backend/cpu_common/StaticTensorManager.h" namespace onert { @@ -32,30 +26,7 @@ namespace backend namespace cpu { -class StaticTensorManager : public backend::IStaticTensorManager -{ -public: - StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager); - virtual ~StaticTensorManager() = default; - - void allocateNonconsts(void); - void deallocateNonconsts(void); - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, - ir::Layout backend_layout, bool as_const); - - void claimPlan(const ir::OperandIndex &ind, uint32_t size); - void releasePlan(const ir::OperandIndex &ind); - - void iterate(const std::function<void(const ir::OperandIndex &)> &fn); - -private: - std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr; - const std::shared_ptr<cpu_common::TensorRegistry> _tensors; - ir::OperandIndexMap<bool> _as_constants; - cpu_common::DynamicTensorManager *_dynamic_tensor_manager; -}; +using StaticTensorManager = cpu_common::StaticTensorManager; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h index 2ad2ad0fb..d663c3f50 100644 --- a/runtime/onert/backend/cpu/Tensor.h +++ b/runtime/onert/backend/cpu/Tensor.h @@ -28,92 +28,7 @@ namespace cpu { using Tensor = cpu_common::Tensor; - -/** - * @brief Class that uses data from external memory that is not managed by a backend - * instead of allocating and copying the data. ExternalTensor's data pointer points to - * an address of memory such as where memory is already allocated, or mmapped area. - * This is meaning that ExternalTensor can take all of types' ir::Data. - * To support this, assume below things no padding, always NHWC layout, - * constant tensor and not dynamic. - */ -class ExternalTensor : public Tensor -{ -public: - ExternalTensor() = delete; - virtual ~ExternalTensor(); - -public: - ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) - : Tensor(info, layout, nullptr) - { - assert(_layout == ir::Layout::NHWC); - assert(_info.isConstant()); - assert(_info.isDynamic() == false); - } - -public: - /** - * @brief set Data to be shared from external so that this ExternalTensor will not be - * allocated on CPU backend - * @param[in] data data of Operand to be set - */ - void setData(const std::shared_ptr<ir::Data> data) - { - assert(data != nullptr); - _data = data; - // Note. Some op such as cker::Conv could take buffer as nullptr. - // That's why _buffer also would be used - _buffer = const_cast<uint8_t *>(_data->base()); - } - -public: - uint8_t *buffer() const override { return _buffer; } - - bool is_constant() const override { return true; } - bool is_dynamic() const override { return false; } - void set_dynamic() override - { - throw std::runtime_error("This tensor does not support changing dynamic"); - } - - void setShape(const ir::Shape &) override - { - throw std::runtime_error("This tensor does not support changing shape"); - } - - void increase_ref() override { ++_num_references; } - - void decrease_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - --_num_references; - if (_num_references == 0) - { - _data.reset(); - _buffer = nullptr; - } - } - - /** - * @brief Reset reference count to zero and release data - */ - void reset_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - _num_references = 0; - - _data.reset(); - _buffer = nullptr; - } - - int32_t num_references() override { return _num_references; } - -private: - std::shared_ptr<const ir::Data> _data; -}; +using ExternalTensor = cpu_common::ExternalTensor; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h index 448abc229..9d8a5deb5 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.h +++ b/runtime/onert/backend/cpu/TensorBuilder.h @@ -20,7 +20,6 @@ #include <backend/cpu_common/DynamicTensorManager.h> #include <backend/cpu_common/TensorRegistry.h> -#include <backend/ITensorBuilder.h> #include <ir/OperandIndexMap.h> #include "StaticTensorManager.h" @@ -35,7 +34,7 @@ namespace backend namespace cpu { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); @@ -47,18 +46,18 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} - IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } private: const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc index 5385bb2a3..55538e2a6 100644 --- a/runtime/onert/backend/cpu/cpu.cc +++ b/runtime/onert/backend/cpu/cpu.cc @@ -16,18 +16,9 @@ #include "Backend.h" -#include <util/logging.h> - extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'cpu' loaded\n"; - return new onert::backend::cpu::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'cpu' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc index 2fd284c91..d5ffdef0b 100644 --- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc @@ -79,6 +79,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t); break; @@ -97,6 +100,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t); break; diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc index 7ef023788..ba9655924 100644 --- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc +++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc @@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens void BatchMatMulLayer::run() { - if (_lhs->data_type() == OperandType::FLOAT32) + if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32)) { batchMatMulFloat32(); } diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc index d26ed7378..edfdfc1a6 100644 --- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc @@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs, void ConcatLayer::run() { - if (_output->data_type() == OperandType::FLOAT32) + switch (_output->data_type()) { - concatenationGeneral<float>(); + case OperandType::FLOAT32: + concatenationGeneral<float>(); + break; + case OperandType::QUANT_UINT8_ASYMM: + concatenationQuant8(); + break; + case OperandType::QUANT_INT8_ASYMM: + concatenationGeneral<int8_t>(); + break; + case OperandType::INT32: + concatenationGeneral<int32_t>(); + break; + case OperandType::INT64: + concatenationGeneral<int64_t>(); + break; + default: + throw std::runtime_error("Concat: unsupported data type"); } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - concatenationQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - concatenationGeneral<int32_t>(); - } - else if (_output->data_type() == OperandType::INT64) - { - concatenationGeneral<int64_t>(); - } - else - throw std::runtime_error("Concat: unsupported data type"); } } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 799e9e2d0..c964e38f9 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -203,8 +203,6 @@ void ConvolutionLayer::prepare() _prepare = true; } -#undef ANDROID_NN_CONV_PARAMETERS - } // namespace ops } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc new file mode 100644 index 000000000..d265d0ac2 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthToSpaceLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/DepthToSpace.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr) +{ + // DO NOTHING +} + +template <typename T> void DepthToSpaceLayer::depthToSpace() +{ + nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()), + _block_size); +} + +void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size, + IPortableTensor *output) +{ + _input = input; + _block_size = block_size; + _output = output; +} + +void DepthToSpaceLayer::run() +{ + switch (_input->data_type()) + { + case OperandType::FLOAT32: + depthToSpace<float>(); + break; + case OperandType::INT32: + depthToSpace<int32_t>(); + break; + case OperandType::INT64: + depthToSpace<int64_t>(); + break; + case OperandType::QUANT_UINT8_ASYMM: + depthToSpace<uint8_t>(); + break; + case OperandType::QUANT_INT8_ASYMM: + depthToSpace<int8_t>(); + break; + default: + throw std::runtime_error{"DepthToSpace: unsupported data type"}; + } +} + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h new file mode 100644 index 000000000..32e0171ce --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in riting, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ + +#include <backend/IPortableTensor.h> + +#include <exec/IFunction.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +class DepthToSpaceLayer : public ::onert::exec::IFunction +{ +public: + DepthToSpaceLayer(); + + void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output); + + void run() override; + +private: + template <typename T> void depthToSpace(); + + const IPortableTensor *_input; + int32_t _block_size; + IPortableTensor *_output; +}; + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index f1dc1103a..85553d14d 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32() op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv<float, float>( op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::convQuant8() @@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8() op_params.quantized_activation_min = output_activation_min; op_params.quantized_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv<uint8_t, int32_t>( op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); + getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::configure( @@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure( const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, - const ir::Activation activation, IPortableTensor *output) + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) { _input = input; _kernel = kernel; @@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure( _dilationHeight = dilationHeight; _activation = activation; _output = output; + _external_context = external_context; } void DepthwiseConvolutionLayer::run() diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h index fb032ecbf..fe1fcc182 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h @@ -19,6 +19,7 @@ #include <backend/IPortableTensor.h> #include "OperationUtils.h" +#include "../ExternalContext.h" #include <exec/IFunction.h> @@ -47,7 +48,7 @@ public: const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, - IPortableTensor *output); + IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context); void run() override; @@ -71,6 +72,8 @@ private: uint32_t _dilationHeight{1}; ir::Activation _activation{ir::Activation::NONE}; + + std::shared_ptr<ExternalContext> _external_context; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc index c1d63172b..3e1da5ec0 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc @@ -18,6 +18,8 @@ #include "OperationUtils.h" +#include <cker/operation/ELU.h> +#include <cker/operation/LeakyReLU.h> #include <cker/operation/Logistic.h> #include <cker/operation/ReLU.h> #include <cker/operation/ReLU6.h> @@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab switch (op_type) { + case ElementwiseActivationType::kElu: + if (input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"}; + } + break; case ElementwiseActivationType::kLogistic: if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { @@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; } break; + case ElementwiseActivationType::kLeakyReLU: + if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), + reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"}; + } + break; default: throw std::runtime_error("ElementwiseActivationLayer: unsupported op type"); } diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h index 3ef580041..948ab3b57 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h @@ -32,9 +32,11 @@ namespace ops enum class ElementwiseActivationType { + kElu, kLogistic, kReLU, - kTanh + kTanh, + kLeakyReLU }; class ElementwiseActivationLayer : public ::onert::exec::IFunction diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc index ea3c1e7cd..1e17a0828 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc @@ -18,6 +18,7 @@ #include "OperationUtils.h" +#include <cker/operation/LogicalAnd.h> #include <cker/operation/LogicalOr.h> #include <cker/operation/MaxMin.h> @@ -33,6 +34,25 @@ namespace ops namespace { template <typename T> +void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output) +{ + if (!HaveSameShapes(lhs, rhs)) + { + nnfw::cker::LogicalAndBroadcast<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs), + reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + } + else + { + nnfw::cker::LogicalAndElementwise<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer())); + } +} + +template <typename T> void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) { @@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab switch (op_type) { + case ElementwiseBinaryType::kLogicalAnd: + if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) + { + _kernel = logicalAndGeneric<bool>; + } + else + { + throw std::runtime_error{"LogicalOr: Unsupported data type"}; + } + break; case ElementwiseBinaryType::kLogicalOr: if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc index 066455e72..15d7f3049 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc @@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output) getTensorShape(output), reinterpret_cast<float *>(output->buffer())); } +void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void squareFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output) { if (!HaveSameShapes(input, output)) @@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen throw std::runtime_error{"Sin: Unsupported data type"}; } break; + case ElementwiseUnaryType::kSqrt: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = sqrtFloat32; + } + else + { + throw std::runtime_error{"Sqrt: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kSquare: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = squareFloat32; + } + else + { + throw std::runtime_error{"Square: Unsupported data type"}; + } + break; case ElementwiseUnaryType::kZerosLike: if (input->data_type() == OperandType::FLOAT32) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h index c1765b5b7..54a6fc02a 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h @@ -46,6 +46,8 @@ enum class ElementwiseUnaryType kRound, kRSqrt, kSin, + kSqrt, + kSquare, kZerosLike }; diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc index b545e6743..5ea0ea893 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc @@ -25,22 +25,19 @@ namespace cpu namespace ops { -ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr) +ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr) { // DO NOTHING } -void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output) +void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output) { _input = input; - _axis = axis; _output = output; } void ExpandDimsLayer::run() { - // TODO use _axis to calculate shape of output when _axis is not constant size_t count = _input->total_size(); memcpy(_output->buffer(), _input->buffer(), count); } diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h index b5d4938b5..1b7ead0c3 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h @@ -36,14 +36,12 @@ public: ExpandDimsLayer(); public: - void configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output); + void configure(const IPortableTensor *input, IPortableTensor *output); void run() override; private: const IPortableTensor *_input; - const IPortableTensor *_axis; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc index df3f8b7cd..5b7c17907 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.cc +++ b/runtime/onert/backend/cpu/ops/FillLayer.cc @@ -29,15 +29,13 @@ namespace cpu namespace ops { -FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr) +FillLayer::FillLayer() : _value(nullptr), _output(nullptr) { // DO NOTHING } -void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output) +void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output) { - _input = input; _value = value; _output = output; } @@ -47,28 +45,24 @@ void FillLayer::run() switch (_output->data_type()) { case OperandType::FLOAT32: - nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<float *>(_value->buffer()), + nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); break; case OperandType::INT32: - nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<int32_t *>(_value->buffer()), + nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); break; case OperandType::INT64: - nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<int64_t *>(_value->buffer()), + nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<int64_t *>(_output->buffer())); break; case OperandType::UINT32: - nnfw::cker::Fill<uint32_t *>( - getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output), - reinterpret_cast<uint32_t *>(_output->buffer())); + nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()), + getTensorShape(_output), + reinterpret_cast<uint32_t *>(_output->buffer())); break; default: throw std::runtime_error{"Fill: unsupported data type"}; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h index 1f17d6b68..ce843654a 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.h +++ b/runtime/onert/backend/cpu/ops/FillLayer.h @@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction public: FillLayer(); - void configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output); + void configure(const IPortableTensor *value, IPortableTensor *output); void run() override; private: - const IPortableTensor *_input; const IPortableTensor *_value; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc index 4921ac748..f130692ee 100644 --- a/runtime/onert/backend/cpu/ops/MeanLayer.cc +++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc @@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee void MeanLayer::MeanFloat32() { - nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), - getReducerAxes(_axes)); + const auto inputShape = getTensorShape(_input); + const auto axisVec = getReducerAxes(_axes); + bool axis_is_1_and_2 = + _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 && + ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1)); + + if (axis_is_1_and_2) + { + nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_output), + reinterpret_cast<float *>(_output->buffer())); + } + else + { + nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + axisVec); + } } void MeanLayer::MeanQuant8() @@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a _axes = axes; _output = output; _keep_dims = keep_dims; + + if (_input->data_type() != OperandType::FLOAT32 && + _input->data_type() != OperandType::QUANT_UINT8_ASYMM) + throw std::runtime_error{"Mean: unsupported data type"}; } void MeanLayer::run() diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h new file mode 100644 index 000000000..bc8a024d8 --- /dev/null +++ b/runtime/onert/backend/ruy/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_H__ +#define __ONERT_BACKEND_RUY_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include <backend/Backend.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<onert::backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique<BackendContext>(this, &graph); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_H__ diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc new file mode 100644 index 000000000..ef686f480 --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h new file mode 100644 index 000000000..b965c9a9d --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt new file mode 100644 index 000000000..206acbfbf --- /dev/null +++ b/runtime/onert/backend/ruy/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIB_ONERT_BACKEND_RUY onert_backend_ruy) + +nnfw_find_package(Ruy REQUIRED) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy) + +set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib) diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc index dac8f898b..179caa9a6 100644 --- a/runtime/onert/backend/cpu/Tensor.cc +++ b/runtime/onert/backend/ruy/Config.cc @@ -14,18 +14,18 @@ * limitations under the License. */ -#include "Tensor.h" +#include "Config.h" namespace onert { namespace backend { -namespace cpu +namespace ruy { -// `dynamic_cast` not working across library boundaries on NDK -// With this as a key function, `dynamic_cast` works across dl -ExternalTensor::~ExternalTensor() {} +bool Config::initialize() { return true; } + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h new file mode 100644 index 000000000..9160dd5b1 --- /dev/null +++ b/runtime/onert/backend/ruy/Config.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONFIG_H__ +#define __ONERT_BACKEND_RUY_CONFIG_H__ + +#include <backend/IConfig.h> +#include <memory> +#include <util/ITimer.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "ruy"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONFIG_H__ diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h new file mode 100644 index 000000000..24b4d924d --- /dev/null +++ b/runtime/onert/backend/ruy/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ + +#include <backend/cpu_common/ConstantInitializer.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h new file mode 100644 index 000000000..f51faccb8 --- /dev/null +++ b/runtime/onert/backend/ruy/ExternalContext.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ + +#include <util/ConfigSource.h> +#include <ruy/context.h> + +namespace +{ +const int kDefaultNumThreadpoolThreads = 4; +} + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class ExternalContext +{ +public: + ExternalContext() : _ruy_context(new ::ruy::Context) + { + setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS)); + } + + void setMaxNumThreads(int max_num_threads) + { + const int target_num_threads = + max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads; + _ruy_context->set_max_num_threads(target_num_threads); + } + + ::ruy::Context *ruy_context() const { return _ruy_context.get(); } + +private: + const std::unique_ptr<::ruy::Context> _ruy_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc new file mode 100644 index 000000000..cd2825068 --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> +#include <exec/DynamicShapeInferer.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique<exec::FunctionSequence>(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique<ops::ConvolutionLayer>(); + + if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic()) + { + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left, + param_padding.param.right, param_padding.param.top, param_padding.param.bottom, + stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor, + activation, ofm_tensor, _external_context); + + _return_fn = std::move(fn); + return; + } + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + const auto weights_format = node.param().weights_format; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::FullyConnectedLayer>(); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h new file mode 100644 index 000000000..0f6bd590a --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include <backend/CustomKernelBuilder.h> +#include <backend/cpu_common/KernelGeneratorBase.h> +#include <ir/Operands.h> +#include <ir/Operations.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h new file mode 100644 index 000000000..af2d25241 --- /dev/null +++ b/runtime/onert/backend/ruy/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h new file mode 100644 index 000000000..60d0fbf77 --- /dev/null +++ b/runtime/onert/backend/ruy/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_H__ +#define __ONERT_BACKEND_RUY_TENSOR_H__ + +#include <backend/cpu_common/Tensor.h> +#include <ir/Data.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_H__ diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc new file mode 100644 index 000000000..c77defc30 --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // CPU backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h new file mode 100644 index 000000000..91c07bd82 --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ + +#include <backend/cpu_common/DynamicTensorManager.h> +#include <backend/cpu_common/TensorRegistry.h> + +#include <ir/OperandIndexMap.h> + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); + + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; + std::unique_ptr<StaticTensorManager> _static_tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc new file mode 100644 index 000000000..d249b2ce3 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "../Tensor.h" +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer() + : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1), + _dilationHeightFactor(1), _activation(ir::Activation::NONE), + _conv_kernel(new nnfw::ruy::Conv()), _prepare(false) +{ + // DO NOTHING +} + +ConvolutionLayer::~ConvolutionLayer() = default; + +void ConvolutionLayer::convFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + nnfw::ruy::ConvParams op_params; + op_params.padding_type = getPaddingType(_paddingType); + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), + getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, const ir::PaddingType paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _dilationWidthFactor = dilationWidthFactor; + _dilationHeightFactor = dilationHeightFactor; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void ConvolutionLayer::run() +{ + prepare(); + + if (_input->is_dynamic() || _kernel->is_dynamic()) + { + const auto ifm_shape = _input->getShape().asFeature(_input->layout()); + const auto ofm_shape = _output->getShape().asFeature(_input->layout()); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = _kernel->getShape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + ir::Stride stride; + stride.vertical = _strideWidth; + stride.horizontal = _strideWidth; + + ir::Padding param_padding; + param_padding.type = _paddingType; + param_padding.param.left = _paddingLeft; + param_padding.param.right = _paddingRight; + param_padding.param.top = _paddingTop; + param_padding.param.bottom = _paddingBottom; + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + _dilationWidthFactor, _dilationHeightFactor); + + _paddingLeft = padding.left; + _paddingRight = padding.right; + _paddingTop = padding.top; + _paddingBottom = padding.bottom; + } + if (_input->data_type() == OperandType::FLOAT32) + { + convFloat32(); + } + else + { + throw std::runtime_error{"Conv: unsupported data type"}; + } +} + +void ConvolutionLayer::prepare() +{ + if (_prepare) + return; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant()) + { + kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output), + _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor); + } + _prepare = true; +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h new file mode 100644 index 000000000..a55387b93 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ + +#include <backend/IPortableTensor.h> +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include <ruy/operation/Conv.h> +#include <exec/IFunction.h> +#include <functional> +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class ConvolutionLayer : public ::onert::exec::IFunction +{ +public: + ConvolutionLayer(); + ~ConvolutionLayer(); + +public: + void convFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType _paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, + IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _dilationWidthFactor; + uint32_t _dilationHeightFactor; + + ir::Activation _activation; + + std::unique_ptr<nnfw::ruy::Conv> _conv_kernel; + + bool _prepare; + + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc new file mode 100644 index 000000000..af693e3b4 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "../Tensor.h" +#include <ruy/operation/FullyConnected.h> +#include <ruy/TensorUtils.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE), _external_context(nullptr) +{ + // DO NOTHING +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + nnfw::ruy::FullyConnectedParams op_params; + + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + op_params.activation = convertActivationType(_activation); + op_params.lhs_cacheable = _weights->is_constant(); + op_params.rhs_cacheable = _input->is_constant(); + + nnfw::ruy::FullyConnected( + op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()), + getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, + IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) +{ + UNUSED_RELEASE(weights_format); + _input = input; + _weights = weights; + _bias = bias; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void FullyConnectedLayer::run() +{ + if (_input->data_type() == OperandType::FLOAT32) + { + fullyConnectedFloat32(); + } + else + { + throw std::runtime_error{"FullyConnected: unsupported data type"}; + } +} + +void FullyConnectedLayer::prepare() +{ + if (_bias && _bias->is_constant()) + { + const int bias_size = getTensorShape(_bias).FlatSize(); + if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size)) + { + _bias = nullptr; + } + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h new file mode 100644 index 000000000..33d560f0b --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ + +#include <backend/IPortableTensor.h> +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include <exec/IFunction.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class FullyConnectedLayer : public ::onert::exec::IFunction +{ +public: + FullyConnectedLayer(); + ~FullyConnectedLayer(); + +public: + void fullyConnectedFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_weights; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; + + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc new file mode 100644 index 000000000..929107b1a --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type) +{ + switch (ir_padding_type) + { + case ir::PaddingType::EXPLICIT: + return nnfw::ruy::PaddingType::kNone; + case ir::PaddingType::SAME: + return nnfw::ruy::PaddingType::kSame; + case ir::PaddingType::VALID: + return nnfw::ruy::PaddingType::kValid; + default: + throw std::runtime_error("Wrong padding type."); + break; + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h new file mode 100644 index 000000000..5dfdc7ec5 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ + +#include <backend/IPortableTensor.h> + +#include <ruy/Shape.h> +#include <ruy/Types.h> +#include <iostream> +#include <ir/DataType.h> +#include <ir/InternalType.h> +#include <ir/Padding.h> + +#include <limits> + +using OperandType = onert::ir::DataType; + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor) +{ + if (tensor == nullptr) + return nnfw::ruy::Shape(); + + const ir::Shape &shape = tensor->get_info().shape(); + + assert(tensor->layout() == ir::Layout::NHWC); + + auto rank = shape.rank(); + nnfw::ruy::Shape ret(rank); + auto data = ret.DimsData(); + for (int i = 0; i < rank; ++i) + { + data[i] = shape.dim(i); + } + return ret; +} + +inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation) +{ + switch (activation) + { + case ir::Activation::NONE: + return nnfw::ruy::FusedActivationFunctionType::kNone; + case ir::Activation::RELU: + return nnfw::ruy::FusedActivationFunctionType::kRelu; + case ir::Activation::RELU1: + return nnfw::ruy::FusedActivationFunctionType::kRelu1; + case ir::Activation::RELU6: + return nnfw::ruy::FusedActivationFunctionType::kRelu6; + case ir::Activation::TANH: + return nnfw::ruy::FusedActivationFunctionType::kTanh; + case ir::Activation::SIGMOID: + return nnfw::ruy::FusedActivationFunctionType::kSigmoid; + default: + throw std::runtime_error{"RUY backend: Cannot convert activation type"}; + } +} + +template <typename T> +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type); + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc new file mode 100644 index 000000000..4f33590e9 --- /dev/null +++ b/runtime/onert/backend/ruy/ruy.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +extern "C" { + +onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } +} diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h new file mode 100644 index 000000000..b7aef1625 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include <backend/Backend.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<onert::backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique<BackendContext>(this, &graph); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__ diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc new file mode 100644 index 000000000..503d088aa --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h new file mode 100644 index 000000000..f81175b9e --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include <util/ConfigSource.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace +{ +const int kDefaultNumThreadpoolThreads = 1; +} + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(nullptr) + { + int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS); + if (num_threads < 1) + num_threads = kDefaultNumThreadpoolThreads; // default num of threads + _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads))); + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt new file mode 100644 index 000000000..e3de31e6f --- /dev/null +++ b/runtime/onert/backend/xnnpack/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack) + +# Unsupported architecture +nnfw_find_package(Xnnpack QUIET) +if(NOT Xnnpack_FOUND) + return() +endif(NOT Xnnpack_FOUND) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK) + +set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib) diff --git a/runtime/onert/backend/xnnpack/Config.cc b/runtime/onert/backend/xnnpack/Config.cc new file mode 100644 index 000000000..4d42a3f18 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Config.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.h" + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +Config::~Config() { xnn_deinitialize(); } + +bool Config::initialize() +{ + xnn_status status = xnn_initialize(nullptr /* allocator */); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to initialize XNNPACK"}; + } + return true; +} + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h new file mode 100644 index 000000000..2cf7406e5 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Config.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__ +#define __ONERT_BACKEND_XNNPACK_CONFIG_H__ + +#include <backend/IConfig.h> +#include <memory> +#include <util/ITimer.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Config : public IConfig +{ +public: + virtual ~Config(); + +public: + std::string id() override { return "xnnpack"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__ diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h new file mode 100644 index 000000000..45cdd8cd9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ + +#include <backend/cpu_common/ConstantInitializer.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/xnnpack/ExternalContext.cc b/runtime/onert/backend/xnnpack/ExternalContext.cc new file mode 100644 index 000000000..3a9fe1b55 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ExternalContext.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExternalContext.h" + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +ExternalContext::ExternalContext(size_t num_threads) + : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy) +{ + assert(_threadpool); +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h new file mode 100644 index 000000000..682fd2e4e --- /dev/null +++ b/runtime/onert/backend/xnnpack/ExternalContext.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ + +#include <memory> +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class ExternalContext +{ +public: + ExternalContext(size_t num_threads); + +public: + pthreadpool *getThreadPool() { return _threadpool.get(); } + +private: + std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc new file mode 100644 index 000000000..b7d3f60fb --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/DepthwiseConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> +#include <exec/DynamicShapeInferer.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique<exec::FunctionSequence>(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context); + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto dilation_width = node.param().dilation.width_factor; + const auto dilation_height = node.param().dilation.height_factor; + const auto param_padding = node.param().padding; + const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, + ker_height, dilation_width, dilation_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + multiplier, dilation_width, dilation_height, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor); + + _return_fn = std::move(fn); +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h new file mode 100644 index 000000000..265824204 --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include <backend/CustomKernelBuilder.h> +#include <backend/cpu_common/KernelGeneratorBase.h> +#include <ir/Operands.h> +#include <ir/Operations.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h new file mode 100644 index 000000000..f7344e8d8 --- /dev/null +++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h new file mode 100644 index 000000000..b39cbd266 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_H__ + +#include <backend/cpu_common/Tensor.h> +#include <ir/Data.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__ diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc new file mode 100644 index 000000000..b570144ce --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // XNNPACK backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h new file mode 100644 index 000000000..dddfedbf9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ + +#include <backend/cpu_common/DynamicTensorManager.h> +#include <backend/cpu_common/TensorRegistry.h> + +#include <ir/OperandIndexMap.h> + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); + + /** + * @brief Register tensor information to allocate on XNNPACK backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; + std::unique_ptr<StaticTensorManager> _static_tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc new file mode 100644 index 000000000..0612995c2 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void ConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 Convolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK Conv: unsupported data type"}; + } +} + +bool ConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(3); + assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels); + assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + 1 /* groups */, input_channels /* group_input_channels */, + output_channels /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast<const float *>(_kernel->buffer()), + reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, + output_activation_max, 0, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool ConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h new file mode 100644 index 000000000..6cbaa9f3a --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class ConvolutionLayer : public Layer +{ +public: + ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc new file mode 100644 index 000000000..947f04194 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer( + const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::configure( + const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, + ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width, + const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _multiplier = multiplier; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void DepthwiseConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"}; + } +} + +bool DepthwiseConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(3); + uint32_t input_channels = _input->getShape().dim(3); + assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels); + assert(output_channels == input_channels * _multiplier); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + input_channels /* groups */, 1 /* group_input_channels */, + _multiplier /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast<const float *>(_kernel->buffer()), + reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, + output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool DepthwiseConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h new file mode 100644 index 000000000..10f840ae7 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class DepthwiseConvolutionLayer : public Layer +{ +public: + DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, + IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _multiplier; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc new file mode 100644 index 000000000..d595fda36 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + IPortableTensor *output) +{ + _input = input; + _kernel = weights; + _bias = bias; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void FullyConnectedLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 FullyConnected operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK FC: unsupported data type"}; + } +} + +bool FullyConnectedLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + const auto &kernel_shape = _kernel->getShape(); + assert(kernel_shape.rank() == 2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(1); + + const auto &input_shape = _input->getShape(); + const auto &output_shape = _output->getShape(); + uint32_t flag = 0; + if (input_shape.rank() != output_shape.rank()) + { + flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D; + assert(input_shape.num_elements() % input_channels == 0); + } + else + { + assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels); + } + + assert(_kernel && _kernel->buffer()); + const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer()); + const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr; + + enum xnn_status status = xnn_create_fully_connected_nc_f32( + input_channels, output_channels, input_channels /* input stride */, + output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min, + output_activation_max, flag, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool FullyConnectedLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1); + enum xnn_status status = xnn_setup_fully_connected_nc_f32( + _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h new file mode 100644 index 000000000..883607ef9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ + +#include "Layer.h" + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class FullyConnectedLayer : public Layer +{ +public: + FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *_kernel, + const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h new file mode 100644 index 000000000..68b610f33 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/Layer.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ + +#include <exec/IFunction.h> +#include <backend/IPortableTensor.h> +#include "OperationUtils.h" +#include "../ExternalContext.h" +#include "../Tensor.h" + +#include <cassert> +#include <memory> + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class Layer : public ::onert::exec::IFunction +{ +public: + Layer(const std::shared_ptr<ExternalContext> external_context) + : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context} + { + // DO NOTHING + } + + ~Layer() + { + if (_kernel_op) + xnn_delete_operator(_kernel_op); + } + +public: + void prepare() override + { + if (_create) + return; + + _create = create(); + assert(_create); + + _setup = setup(); + } + virtual bool create() = 0; + virtual bool setup() = 0; + +protected: + xnn_operator_t _kernel_op; + bool _create; + bool _setup; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h new file mode 100644 index 000000000..5102e32dd --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ + +// duplicated from cpu/ops/OperationUtils.h +#include <ir/InternalType.h> +#include <ir/Padding.h> +#include <ir/DataType.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +using OperandType = ir::DataType; + +template <typename T> +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported fused activation function"}; + } +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc new file mode 100644 index 000000000..38a6c5572 --- /dev/null +++ b/runtime/onert/backend/xnnpack/xnnpack.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +#include <util/logging.h> + +extern "C" { +onert::backend::Backend *onert_backend_create() +{ + VERBOSE(onert_backend_create) << "'xnnpack' loaded\n"; + return new onert::backend::xnnpack::Backend; +} + +void onert_backend_destroy(onert::backend::Backend *backend) +{ + VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n"; + delete backend; +} +} |