diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-12-14 14:43:43 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-12-14 14:43:43 +0900 |
commit | 62529acabbafce7730601ed01d5709d7bc0d378a (patch) | |
tree | bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert | |
parent | 6ea13af5257155ff993c205cf997b870cc627f73 (diff) | |
download | nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2 nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip |
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'runtime/onert')
228 files changed, 8292 insertions, 3566 deletions
diff --git a/runtime/onert/api/.clang-format b/runtime/onert/api/.clang-format new file mode 120000 index 000000000..83185fee3 --- /dev/null +++ b/runtime/onert/api/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8
\ No newline at end of file diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index 76380b4b8..6eb7e6ba9 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -64,7 +64,8 @@ typedef struct nnfw_session nnfw_session; * * The type of tensor represented in {@link nnfw_tensorinfo} */ -typedef enum { +typedef enum +{ /** A tensor of 32 bit floating point */ NNFW_TYPE_TENSOR_FLOAT32 = 0, /** A tensor of 32 bit signed integer */ @@ -96,7 +97,8 @@ typedef enum { /** * @brief Result values returned from a call to an API function */ -typedef enum { +typedef enum +{ /** Successful */ NNFW_STATUS_NO_ERROR = 0, /** @@ -117,7 +119,8 @@ typedef enum { /** * @brief Data format of a tensor */ -typedef enum { +typedef enum +{ /** Don't care layout */ NNFW_LAYOUT_NONE = 0, /** @@ -135,7 +138,8 @@ typedef enum { /** * @brief Information ID for retrieving information on nnfw (e.g. version) */ -typedef enum { +typedef enum +{ /** nnfw runtime version * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch. */ diff --git a/runtime/onert/api/include/nnfw_internal.h b/runtime/onert/api/include/nnfw_internal.h index eb4b6d629..a88e32436 100644 --- a/runtime/onert/api/include/nnfw_internal.h +++ b/runtime/onert/api/include/nnfw_internal.h @@ -35,4 +35,13 @@ NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value, */ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size); +/** + * @brief Load a tflite/circle model from file. + * + * @param[in] session session + * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension + * @return NFNFW_STATUS + */ +NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path); + #endif // __NNFW_INTERNAL_H__ diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 31c3890e3..28703c0eb 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01000b01 +#define NNFW_VERSION 0x01000c00 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc index 3f3a5d81e..56525feff 100644 --- a/runtime/onert/api/src/CustomKernel.cc +++ b/runtime/onert/api/src/CustomKernel.cc @@ -65,7 +65,7 @@ public: }; Kernel::Kernel(const nnfw_custom_eval evalFunction) - : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction) + : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction) { } diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index 835b2078a..4eba4ecec 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -90,7 +90,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session) NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path) { NNFW_RETURN_ERROR_IF_NULL(session); - return session->load_model_from_file(pacakge_file_path); + return session->load_model_from_nnpackage(pacakge_file_path); } /* @@ -350,6 +350,12 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, return session->load_circle_from_buffer(buffer, size); } +NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->load_model_from_modelfile(file_path); +} + NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index) { NNFW_RETURN_ERROR_IF_NULL(session); diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index a4c69eb4f..c3fdb131b 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -19,17 +19,19 @@ #include "compiler/Compiler.h" #include "util/ConfigSource.h" #include "util/Exceptions.h" +#include "util/logging.h" #include "exec/Execution.h" #include "circle_loader.h" #include "tflite_loader.h" #include "json/json.h" #include "ir/OpCode.h" +#include "util/TracingCtx.h" + #include <fstream> #include <iostream> #include <string> #include <vector> #include <dirent.h> -#include <util/ConfigSource.h> #include <misc/string_helpers.h> /* @@ -40,8 +42,11 @@ #define MAX_PATH_LENGTH 1024 #define MAX_TENSOR_NAME_LENGTH 64 +namespace +{ + // Is null-terminating in length ? -static bool null_terminating(const char *str, uint32_t length) +bool null_terminating(const char *str, uint32_t length) { for (uint32_t i = 0; i < length; i++) { @@ -53,7 +58,7 @@ static bool null_terminating(const char *str, uint32_t length) return false; } -static onert::ir::Layout convertLayout(NNFW_LAYOUT layout) +onert::ir::Layout convertLayout(NNFW_LAYOUT layout) { if (layout == NNFW_LAYOUT_CHANNELS_LAST) { @@ -92,9 +97,70 @@ NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensor } } +std::string trim(const std::string &value) +{ + std::string whitespace = " \t"; + auto begin = value.find_first_not_of(whitespace); + if (begin == std::string::npos) + return ""; // no content + + auto end = value.find_last_not_of(whitespace); + auto range = end - begin + 1; + return value.substr(begin, range); +} + +using CfgKeyValues = std::unordered_map<std::string, std::string>; + +bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) +{ + std::ifstream ifs(cfgfile); + if (ifs.is_open()) + { + std::string line; + while (std::getline(ifs, line)) + { + auto cmtpos = line.find('#'); + if (cmtpos != std::string::npos) + { + line = line.substr(0, cmtpos); + } + std::istringstream isline(line); + std::string key; + if (std::getline(isline, key, '=')) + { + std::string value; + if (std::getline(isline, value)) + { + key = trim(key); + keyValues[key] = trim(value); + } + } + } + ifs.close(); + return true; + } + return false; +} + +void setConfigKeyValues(const CfgKeyValues &keyValues) +{ + auto configsrc = std::make_unique<onert::util::GeneralConfigSource>(); + + for (auto it = keyValues.begin(); it != keyValues.end(); ++it) + { + VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; + configsrc->set(it->first, it->second); + } + + onert::util::config_source_ext(std::move(configsrc)); +} + +} // namespace + nnfw_session::nnfw_session() - : _subgraphs{nullptr}, _execution{nullptr}, - _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()} + : _subgraphs{nullptr}, _execution{nullptr}, + _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{ + nullptr} { // DO NOTHING } @@ -122,13 +188,65 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) return NNFW_STATUS_ERROR; } - _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs); + _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); + + _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); + + _state = State::MODEL_LOADED; + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path) +{ + if (!isStateInitialized()) + return NNFW_STATUS_INVALID_STATE; + + if (!model_file_path) + { + std::cerr << "Model file path is null." << std::endl; + return NNFW_STATUS_UNEXPECTED_NULL; + } + + std::string filename{model_file_path}; + if (filename.size() < 8) // .tflite or .circle + { + std::cerr << "Invalid model file path." << std::endl; + return NNFW_STATUS_ERROR; + } + + std::string model_type = filename.substr(filename.size() - 7, 7); + + try + { + if (model_type == ".tflite") + { + _subgraphs = onert::tflite_loader::loadModel(filename.c_str()); + } + else if (model_type == ".circle") + { + _subgraphs = onert::circle_loader::loadModel(filename.c_str()); + } + else + { + std::cerr << "Unsupported model type" << std::endl; + return NNFW_STATUS_ERROR; + } + } + catch (const std::exception &e) + { + std::cerr << "Error during model loading : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + + _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); + + _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } -NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) +NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir) { if (!isStateInitialized()) return NNFW_STATUS_INVALID_STATE; @@ -166,6 +284,18 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) mfs >> root; const Json::Value &models = root["models"]; const Json::Value &model_types = root["model-types"]; + const Json::Value &configs = root["configs"]; + + if (!configs.empty() && !configs[0].empty()) + { + auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString(); + + CfgKeyValues keyValues; + if (loadConfigure(filepath, keyValues)) + { + setConfigKeyValues(keyValues); + } + } auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model auto model_type = model_types[0].asString(); // first model's type @@ -190,7 +320,9 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) return NNFW_STATUS_ERROR; } - _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs); + _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); + + _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; @@ -225,7 +357,7 @@ NNFW_STATUS nnfw_session::prepare() { _subgraphs.reset(); std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile(); - _execution = std::make_shared<onert::exec::Execution>(executors); + _execution = std::make_unique<onert::exec::Execution>(executors); } catch (const std::exception &e) { @@ -308,8 +440,8 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo if (!buffer && length != 0) { std::cerr - << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0" - << std::endl; + << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0" + << std::endl; return NNFW_STATUS_ERROR; } @@ -337,8 +469,8 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b if (!buffer && length != 0) { std::cerr - << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0" - << std::endl; + << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0" + << std::endl; return NNFW_STATUS_ERROR; } diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h index 604ba38b4..a50ac72d3 100644 --- a/runtime/onert/api/src/nnfw_api_internal.h +++ b/runtime/onert/api/src/nnfw_api_internal.h @@ -21,6 +21,7 @@ #include "nnfw_experimental.h" #include <util/GeneralConfigSource.h> +#include <util/TracingCtx.h> #include <string> #include <memory> @@ -100,7 +101,7 @@ public: nnfw_session(); ~nnfw_session(); - NNFW_STATUS load_model_from_file(const char *package_file_path); + NNFW_STATUS load_model_from_nnpackage(const char *package_file_path); NNFW_STATUS prepare(); NNFW_STATUS run(); @@ -132,6 +133,7 @@ public: NNFW_STATUS set_config(const char *key, const char *value); NNFW_STATUS get_config(const char *key, char *value, size_t value_size); NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size); + NNFW_STATUS load_model_from_modelfile(const char *file_path); // // Experimental API @@ -154,8 +156,10 @@ private: State _state{State::INITIALIZED}; std::shared_ptr<onert::ir::Subgraphs> _subgraphs; std::unique_ptr<onert::compiler::Compiler> _compiler; - std::shared_ptr<onert::exec::Execution> _execution; + std::unique_ptr<onert::exec::Execution> _execution; std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry; + + std::unique_ptr<onert::util::TracingCtx> _tracing_ctx; }; #endif // __API_NNFW_API_INTERNAL_H__ diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt index 42d622aa8..dc038c975 100644 --- a/runtime/onert/backend/CMakeLists.txt +++ b/runtime/onert/backend/CMakeLists.txt @@ -4,3 +4,5 @@ add_subdirectory(cpu) add_subdirectory(acl_cl) add_subdirectory(acl_neon) add_subdirectory(acl_common) +add_subdirectory(ruy) +add_subdirectory(xnnpack) diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 5c5041378..4f48314c1 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -20,6 +20,7 @@ #include <memory> #include <backend/Backend.h> +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr<IConfig> config() const override { return _config; } - std::unique_ptr<BackendContext> newContext(const ir::Graph &graph, - const std::shared_ptr<custom::IKernelBuilder> &, - bool is_linear_executor) const override + std::unique_ptr<backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique<BackendContext>(this, &graph); + auto context = std::make_unique<acl_cl::BackendContext>(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); - auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; } diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc new file mode 100644 index 000000000..a6f228a4f --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace acl_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h new file mode 100644 index 000000000..662d767d0 --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + std::shared_ptr<Optimizer> optimizer; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index b45b91058..413a7ccc3 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node) const auto &axis_obj = _operands.at(axis_index); const auto ifm_rank = input_obj.shape().rank(); - const auto frontend_layout = this->_current_op_seq_layout; + const auto frontend_layout = this->_current_layout; auto output_tensor = this->_tensor_reg->getITensor(output_index); const auto backend_layout = output_tensor->layout(); diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h index 9f3acb461..fc0eca84f 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.h +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -45,4 +45,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc index e7690af2e..3a5ea5a0f 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.cc +++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc @@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<TensorBuilder> &tensor_builder, const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique<exec::FunctionSequence>(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector<int32_t>(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor, ::arm_compute::CLFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); std::unique_ptr<arm_compute::IFunction> fn; @@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto &perms = _ctx.at(perm_idx); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx); const size_t output_rank = _ctx.at(output_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); @@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) UNUSED_RELEASE(backend_layout); assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node) _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; auto ifm_shape = _ctx.at(ifm_index).shape(); auto ofm_shape = _ctx.at(ofm_index).shape(); @@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar<int32_t>(); @@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>( - ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), - ::arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar<int32_t>(); if (axis < 0) @@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node) { int32_t split_dim = split_dim_op.asScalar<int32_t>(); uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions()) @@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto input = _tensor_reg->getAclTensor(input_index)->handle(); auto output = _tensor_reg->getAclTensor(output_index)->handle(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); ::arm_compute::PaddingList padding_list; diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h index e8a922677..22a7c18a3 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.h +++ b/runtime/onert/backend/acl_cl/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_cl { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,60 +39,61 @@ public: const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::ConvertFp16ToFp32 &) override; + void visit(const ir::operation::ConvertFp32ToFp16 &) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; + void visit(const ir::operation::EmbeddingLookup &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::InstanceNorm &) override; - void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::ResizeNearestNeighbor &) override; + void visit(const ir::operation::Reverse &) override; void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::L2Normalization &) override; - void visit(const ir::operation::HashtableLookup &) override; - void visit(const ir::operation::PReLU &) override; - void visit(const ir::operation::TransposeConv &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::TopKV2 &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::LocalResponseNormalization &) override; - void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::TopKV2 &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::ConvertFp32ToFp16 &) override; - void visit(const ir::operation::ConvertFp16ToFp32 &) override; - void visit(const ir::operation::Reverse &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h index 18d38ec1b..ad5154860 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.h +++ b/runtime/onert/backend/acl_cl/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ -#include <backend/IOptimizer.h> -#include <backend/BackendContext.h> +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_cl { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc index 88378b13a..82cbde02f 100644 --- a/runtime/onert/backend/acl_cl/acl_cl.cc +++ b/runtime/onert/backend/acl_cl/acl_cl.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include <util/logging.h> - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_cl' loaded\n"; - return new onert::backend::acl_cl::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc index 21f41a3e6..921d107d9 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc @@ -25,7 +25,7 @@ namespace acl_common AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands, const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} + : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h index 52f4c54cf..894e2e7d1 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.h +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> +#include <backend/cpu_common/ConstantInitializerBase.h> #include <ir/Operands.h> #include "AclTensorRegistry.h" @@ -28,7 +28,7 @@ namespace backend namespace acl_common { -class AclConstantInitializer : public IConstantInitializer +class AclConstantInitializer : public cpu_common::ConstantInitializerBase { public: AclConstantInitializer(const ir::Operands &operands, diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index bb7abc95d..12e9ab894 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -21,7 +21,6 @@ #include <queue> #include <arm_compute/core/Types.h> -#include <backend/ITensorBuilder.h> #include "ir/OperandIndexMap.h" #include <ir/Operands.h> #include "AclTensorManager.h" @@ -43,14 +42,12 @@ enum class UsesType LAST }; -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -class AclTensorBuilder : public ITensorBuilder +template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder { public: using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; - AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg); + AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); /** * @brief Register tensor information to allocate on ACL-CL backend @@ -59,16 +56,16 @@ public: * @param[in] layout Tensor data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override; + void prepare(void); + void allocate(); + void postFunctionPrepare(); T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } @@ -105,7 +102,6 @@ private: ir::OperandIndexMap<size_t> _uses_count_map; std::unique_ptr<T_AclTensorManager> _tensor_mgr; - std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg; // for linear executor std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; @@ -133,10 +129,9 @@ namespace acl_common { template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder( - const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg) - : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg} +AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands, + T_AclTensorManager *tensor_mgr) + : _operands{operands}, _tensor_mgr{tensor_mgr} { assert(_tensor_mgr); } diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc index 67d9d7176..7d3a69032 100644 --- a/runtime/onert/backend/acl_common/Convert.cc +++ b/runtime/onert/backend/acl_common/Convert.cc @@ -109,13 +109,19 @@ namespace acl_common case ir::DataType::UINT8: return ::arm_compute::DataType::U8; case ir::DataType::QUANT_INT8_SYMM: - return ::arm_compute::DataType::S8; + return ::arm_compute::DataType::QSYMM8; + case ir::DataType::QUANT_INT8_ASYMM: + return ::arm_compute::DataType::QASYMM8_SIGNED; case ir::DataType::FLOAT16: return ::arm_compute::DataType::F16; case ir::DataType::INT64: return ::arm_compute::DataType::S64; + case ir::DataType::QUANT_INT16_ASYMM: + return ::arm_compute::DataType::QASYMM16; + case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL: + return ::arm_compute::DataType::QSYMM8_PER_CHANNEL; default: - throw std::runtime_error("Not supported, yet"); + throw std::runtime_error("Not supported internal data type, yet"); break; } } @@ -175,7 +181,7 @@ namespace acl_common return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal activation, yet"}; break; } } @@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal elementwise activation, yet"}; break; } } @@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) return ir::DataType::UINT32; case ::arm_compute::DataType::QASYMM8: return ir::DataType::QUANT_UINT8_ASYMM; + case ::arm_compute::DataType::QASYMM8_SIGNED: + return ir::DataType::QUANT_INT8_ASYMM; case ::arm_compute::DataType::U8: return ir::DataType::UINT8; case ::arm_compute::DataType::QSYMM8: @@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) case ::arm_compute::DataType::S64: return ir::DataType::INT64; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported acl data type, yet"}; break; } } diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index 35d6e4e8e..b11c19733 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -21,6 +21,7 @@ #include <backend/Backend.h> #include <ir/Operands.h> +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr<IConfig> config() const override { return _config; } - std::unique_ptr<BackendContext> newContext(const ir::Graph &graph, - const std::shared_ptr<custom::IKernelBuilder> &, - bool is_linear_executor) const override + std::unique_ptr<backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique<BackendContext>(this, &graph); + auto context = std::make_unique<acl_neon::BackendContext>(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); - auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; } diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc new file mode 100644 index 000000000..8b53171f7 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace neon +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h new file mode 100644 index 000000000..dd764c091 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + std::shared_ptr<Optimizer> optimizer; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h index c7d71cdcf..9723ba012 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.h +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -41,4 +41,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index ffaee3b3e..e712dfa81 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<TensorBuilder> &tensor_builder, const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique<exec::FunctionSequence>(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) } } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar<int32_t>(); @@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) assert(axis_value >= 0 && axis_value < ifm_rank); const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>( - ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), - arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector<int32_t>(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor, ::arm_compute::NEFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) { const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); const auto axis = acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); @@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); const auto reduce_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); @@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar<int32_t>(); if (axis < 0) @@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto rank = _ctx.at(ifm_idx).shape().rank(); @@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx); const size_t output_rank = _ctx.at(out_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h index 4d269cde5..2a4b307b8 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.h +++ b/runtime/onert/backend/acl_neon/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_neon { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,17 +39,20 @@ public: const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; - void visit(const ir::operation::ArgMax &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::FullyConnected &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::HashtableLookup &) override; @@ -57,36 +60,34 @@ public: void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::Reduce &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Slice &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Squeeze &) override; void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::ExpandDims &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::OneHot &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h index 5fe0d519c..b8fb343e9 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.h +++ b/runtime/onert/backend/acl_neon/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ -#include <backend/IOptimizer.h> -#include <backend/BackendContext.h> +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_neon { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc index f490d132d..6535fb291 100644 --- a/runtime/onert/backend/acl_neon/acl_neon.cc +++ b/runtime/onert/backend/acl_neon/acl_neon.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include <util/logging.h> - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_neon' loaded\n"; - return new onert::backend::acl_neon::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index fc8574b26..0b416a7e9 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -54,8 +54,6 @@ public: context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, context->external_context()); - context->tensor_register = nullptr; - context->optimizer = nullptr; return context; } diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc new file mode 100644 index 000000000..6b958c1b7 --- /dev/null +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h index e90b21054..0a4106d33 100644 --- a/runtime/onert/backend/cpu/BackendContext.h +++ b/runtime/onert/backend/cpu/BackendContext.h @@ -18,6 +18,9 @@ #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__ #include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" #include "ExternalContext.h" namespace onert @@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext public: BackendContext(const Backend *backend, const ir::Graph *graph, std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, - std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, - std::shared_ptr<ITensorRegister> tensor_register = nullptr, - std::shared_ptr<IOptimizer> optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, - constant_initializer, kernel_gen, tensor_register, - optimizer), - _external_context(new ExternalContext) + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) { } + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + std::shared_ptr<ExternalContext> external_context() { return _external_context; } private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, // the thread pool is also created in duplicate // TODO Create one ruy context for session diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h index c016c83bc..d7858c0f6 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.h +++ b/runtime/onert/backend/cpu/ConstantInitializer.h @@ -14,13 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ -#include "backend/cpu_common/TensorRegistry.h" - -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> +#include <backend/cpu_common/ConstantInitializer.h> namespace onert { @@ -29,35 +26,10 @@ namespace backend namespace cpu { -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<ITensorRegistry> &tensor_reg); - -public: - void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; - - // TODO: For now the only cpu backend supports constant tensor to use data from external - // If the other backend supports (to do this, - // ExternalTensor should be abstract such as IExternal, maybe), - // this can be an interface of IConstantInitializer - void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &); - -public: - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::FullyConnected &) override; - -private: - std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr<ITensorRegistry> _tensor_reg; -}; +using ConstantInitializer = cpu_common::ConstantInitializer; } // namespace cpu } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index 32e249f5a..f5d11f4f1 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ -#include <backend/IExternalContext.h> #include <util/ConfigSource.h> #include <ruy/context.h> @@ -33,7 +32,7 @@ namespace backend namespace cpu { -class ExternalContext : public IExternalContext +class ExternalContext { public: ExternalContext() : _ruy_context(new ruy::Context) diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 451815b65..25756eced 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -23,6 +23,7 @@ #include "ops/CompareLayer.h" #include "ops/ConcatLayer.h" #include "ops/ConvolutionLayer.h" +#include "ops/DepthToSpaceLayer.h" #include "ops/DepthwiseConvolutionLayer.h" #include "ops/EinsumLayer.h" #include "ops/ElementwiseActivationLayer.h" @@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type { switch (type_ir) { + case ir::operation::ElementwiseActivation::Type::ELU: + return ops::ElementwiseActivationType::kElu; case ir::operation::ElementwiseActivation::Type::LOGISTIC: return ops::ElementwiseActivationType::kLogistic; case ir::operation::ElementwiseActivation::Type::RELU: return ops::ElementwiseActivationType::kReLU; case ir::operation::ElementwiseActivation::Type::TANH: return ops::ElementwiseActivationType::kTanh; + case ir::operation::ElementwiseActivation::Type::LEAKY_RELU: + return ops::ElementwiseActivationType::kLeakyReLU; default: throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); } @@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary { switch (type_ir) { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + return ops::ElementwiseBinaryType::kLogicalAnd; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: return ops::ElementwiseBinaryType::kLogicalOr; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: @@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise return ops::ElementwiseUnaryType::kRSqrt; case ir::operation::ElementwiseUnary::Type::SIN: return ops::ElementwiseUnaryType::kSin; + case ir::operation::ElementwiseUnary::Type::SQRT: + return ops::ElementwiseUnaryType::kSqrt; + case ir::operation::ElementwiseUnary::Type::SQUARE: + return ops::ElementwiseUnaryType::kSquare; case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE: return ops::ElementwiseUnaryType::kZerosLike; default: @@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr<ExternalContext> &external_context) : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context) + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) { // DO NOTHING } @@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); } - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) _return_fn = std::move(fn); return; } - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width, - dilation_height, activation, ofm_tensor); + dilation_height, activation, ofm_tensor, _external_context); _return_fn = std::move(fn); } @@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); auto output_tensor = _tensor_reg->getPortableTensor(ofm_index); @@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) void KernelGenerator::visit(const ir::operation::Fill &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)}; + // SHAPE input is used for shape inference const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); - auto input_tensor = _tensor_reg->getPortableTensor(input_index); auto value_tensor = _tensor_reg->getPortableTensor(value_index); auto fn = std::make_unique<ops::FillLayer>(); - fn->configure(input_tensor, value_tensor, output_tensor); + fn->configure(value_tensor, output_tensor); _return_fn = std::move(fn); } @@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); const auto &input_shape = _ctx.at(input_index).shape(); UNUSED_RELEASE(input_shape); - assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout); + assert(input_shape.rank() < 4 || _current_layout == backend_layout); const auto axis_raw = node.param().axis; const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw); @@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node) for (auto &idx : opSeq) { const auto &operand = _ctx.at(idx); - // TODO make sure using `_current_op_seq_layout` is correct for custom operations + // TODO make sure using `_current_layout` is correct for custom operations types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()}); auto in_tensor = _tensor_reg->getPortableTensor(idx); tensors.emplace_back(in_tensor); @@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + // AXIS input is used for output shape inference auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); - auto axis_tensor = _tensor_reg->getPortableTensor(axis_index); auto fn = std::make_unique<ops::ExpandDimsLayer>(); - fn->configure(input_tensor, axis_tensor, output_tensor); + fn->configure(input_tensor, output_tensor); _return_fn = std::move(fn); } @@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(-rank <= axis && axis < rank); @@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) const auto input_index{node.getInputs().at(0)}; const auto rank = _ctx.at(input_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(rank == 0 || (-rank <= axis && axis < rank)); @@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)}; + const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); @@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto fn = std::make_unique<ops::ArgMinMaxLayer>(); - fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true); + fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max); _return_fn = std::move(fn); } @@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node) const auto kh = node.param().kh; const auto kw = node.param().kw; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); const auto activation = node.param().activation; @@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) _return_fn = std::move(fn); } +void KernelGenerator::visit(const ir::operation::DepthToSpace &node) +{ + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + auto block_size = node.param().block_size; + + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + + auto fn = std::make_unique<ops::DepthToSpaceLayer>(); + + fn->configure(input_tensor, block_size, output_tensor); + _return_fn = std::move(fn); +} + void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) { const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index 5df77607f..3a4cfbffa 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -23,7 +23,7 @@ #include "Tensor.h" #include <backend/CustomKernelBuilder.h> -#include <backend/IKernelGenerator.h> +#include <backend/cpu_common/KernelGeneratorBase.h> #include <ir/Operands.h> #include <ir/Operations.h> @@ -34,7 +34,7 @@ namespace backend namespace cpu { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -43,59 +43,59 @@ public: const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context); - using IKernelGenerator::visit; + void visit(const ir::OpSequence &) override; void visit(const ir::operation::AddN &) override; - void visit(const ir::OpSequence &) override; + void visit(const ir::operation::ArgMinMax &) override; + void visit(const ir::operation::BatchMatMul &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::BroadcastTo &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::Custom &node) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::Fill &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Einsum &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::Custom &node) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::Fill &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::FusedBatchNorm &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::MatrixBandPart &) override; void visit(const ir::operation::OneHot &) override; - void visit(const ir::operation::Transpose &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Select &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Split &) override; - void visit(const ir::operation::Shape &) override; - void visit(const ir::operation::ResizeBilinear &node) override; - void visit(const ir::operation::Reverse &) override; - void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Pow &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Tile &) override; - void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::Range &) override; void visit(const ir::operation::Rank &) override; - void visit(const ir::operation::MatrixBandPart &) override; - void visit(const ir::operation::BatchMatMul &) override; - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::BroadcastTo &) override; - void visit(const ir::operation::FusedBatchNorm &) override; - void visit(const ir::operation::LogSoftmax &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::ResizeBilinear &node) override; + void visit(const ir::operation::Reverse &) override; + void visit(const ir::operation::Select &) override; + void visit(const ir::operation::Shape &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::Tile &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Unpack &) override; private: const ir::Operands &_ctx; @@ -103,7 +103,7 @@ private: std::shared_ptr<TensorBuilder> _tensor_builder; std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc deleted file mode 100644 index 3edac897c..000000000 --- a/runtime/onert/backend/cpu/StaticTensorManager.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "StaticTensorManager.h" -#include "Tensor.h" - -#include <util/logging.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ - -StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager) - : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} -{ - // DO NOTHING -} - -void StaticTensorManager::allocateNonconsts(void) -{ - _nonconst_mgr->allocate(); - - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second.get(); - if (!_as_constants[ind] && !tensor->is_dynamic()) - { - auto *buffer = _nonconst_mgr->getBuffer(ind); - tensor->setBuffer(buffer); - - VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) << std::endl; - } - } -} - -void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } - -void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, - const ir::OperandInfo &tensor_info, ir::Layout backend_layout, - bool as_const) -{ - assert(!_tensors->getITensor(ind)); - if (as_const) - { - auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - else - { - auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, - _dynamic_tensor_manager->dynamic_mem_mgr().get()); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - _as_constants[ind] = as_const; -} - -void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->claimPlan(ind, size); -} - -void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->releasePlan(ind); -} - -void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn) -{ - for (const auto &it : _tensors->native_tensors()) - fn(it.first); -} - -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h index 2af61e4e7..d07f0c814 100644 --- a/runtime/onert/backend/cpu/StaticTensorManager.h +++ b/runtime/onert/backend/cpu/StaticTensorManager.h @@ -17,13 +17,7 @@ #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ -#include "backend/IStaticTensorManager.h" -#include "backend/cpu_common/DynamicTensorManager.h" -#include "backend/cpu_common/MemoryManager.h" -#include "backend/cpu_common/TensorRegistry.h" -#include "backend/ITensorManager.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperandInfo.h" +#include "backend/cpu_common/StaticTensorManager.h" namespace onert { @@ -32,30 +26,7 @@ namespace backend namespace cpu { -class StaticTensorManager : public backend::IStaticTensorManager -{ -public: - StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager); - virtual ~StaticTensorManager() = default; - - void allocateNonconsts(void); - void deallocateNonconsts(void); - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, - ir::Layout backend_layout, bool as_const); - - void claimPlan(const ir::OperandIndex &ind, uint32_t size); - void releasePlan(const ir::OperandIndex &ind); - - void iterate(const std::function<void(const ir::OperandIndex &)> &fn); - -private: - std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr; - const std::shared_ptr<cpu_common::TensorRegistry> _tensors; - ir::OperandIndexMap<bool> _as_constants; - cpu_common::DynamicTensorManager *_dynamic_tensor_manager; -}; +using StaticTensorManager = cpu_common::StaticTensorManager; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h index 2ad2ad0fb..d663c3f50 100644 --- a/runtime/onert/backend/cpu/Tensor.h +++ b/runtime/onert/backend/cpu/Tensor.h @@ -28,92 +28,7 @@ namespace cpu { using Tensor = cpu_common::Tensor; - -/** - * @brief Class that uses data from external memory that is not managed by a backend - * instead of allocating and copying the data. ExternalTensor's data pointer points to - * an address of memory such as where memory is already allocated, or mmapped area. - * This is meaning that ExternalTensor can take all of types' ir::Data. - * To support this, assume below things no padding, always NHWC layout, - * constant tensor and not dynamic. - */ -class ExternalTensor : public Tensor -{ -public: - ExternalTensor() = delete; - virtual ~ExternalTensor(); - -public: - ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) - : Tensor(info, layout, nullptr) - { - assert(_layout == ir::Layout::NHWC); - assert(_info.isConstant()); - assert(_info.isDynamic() == false); - } - -public: - /** - * @brief set Data to be shared from external so that this ExternalTensor will not be - * allocated on CPU backend - * @param[in] data data of Operand to be set - */ - void setData(const std::shared_ptr<ir::Data> data) - { - assert(data != nullptr); - _data = data; - // Note. Some op such as cker::Conv could take buffer as nullptr. - // That's why _buffer also would be used - _buffer = const_cast<uint8_t *>(_data->base()); - } - -public: - uint8_t *buffer() const override { return _buffer; } - - bool is_constant() const override { return true; } - bool is_dynamic() const override { return false; } - void set_dynamic() override - { - throw std::runtime_error("This tensor does not support changing dynamic"); - } - - void setShape(const ir::Shape &) override - { - throw std::runtime_error("This tensor does not support changing shape"); - } - - void increase_ref() override { ++_num_references; } - - void decrease_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - --_num_references; - if (_num_references == 0) - { - _data.reset(); - _buffer = nullptr; - } - } - - /** - * @brief Reset reference count to zero and release data - */ - void reset_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - _num_references = 0; - - _data.reset(); - _buffer = nullptr; - } - - int32_t num_references() override { return _num_references; } - -private: - std::shared_ptr<const ir::Data> _data; -}; +using ExternalTensor = cpu_common::ExternalTensor; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h index 448abc229..9d8a5deb5 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.h +++ b/runtime/onert/backend/cpu/TensorBuilder.h @@ -20,7 +20,6 @@ #include <backend/cpu_common/DynamicTensorManager.h> #include <backend/cpu_common/TensorRegistry.h> -#include <backend/ITensorBuilder.h> #include <ir/OperandIndexMap.h> #include "StaticTensorManager.h" @@ -35,7 +34,7 @@ namespace backend namespace cpu { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); @@ -47,18 +46,18 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} - IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } private: const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc index 5385bb2a3..55538e2a6 100644 --- a/runtime/onert/backend/cpu/cpu.cc +++ b/runtime/onert/backend/cpu/cpu.cc @@ -16,18 +16,9 @@ #include "Backend.h" -#include <util/logging.h> - extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'cpu' loaded\n"; - return new onert::backend::cpu::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'cpu' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc index 2fd284c91..d5ffdef0b 100644 --- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc @@ -79,6 +79,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t); break; @@ -97,6 +100,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t); break; diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc index 7ef023788..ba9655924 100644 --- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc +++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc @@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens void BatchMatMulLayer::run() { - if (_lhs->data_type() == OperandType::FLOAT32) + if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32)) { batchMatMulFloat32(); } diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc index d26ed7378..edfdfc1a6 100644 --- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc @@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs, void ConcatLayer::run() { - if (_output->data_type() == OperandType::FLOAT32) + switch (_output->data_type()) { - concatenationGeneral<float>(); + case OperandType::FLOAT32: + concatenationGeneral<float>(); + break; + case OperandType::QUANT_UINT8_ASYMM: + concatenationQuant8(); + break; + case OperandType::QUANT_INT8_ASYMM: + concatenationGeneral<int8_t>(); + break; + case OperandType::INT32: + concatenationGeneral<int32_t>(); + break; + case OperandType::INT64: + concatenationGeneral<int64_t>(); + break; + default: + throw std::runtime_error("Concat: unsupported data type"); } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - concatenationQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - concatenationGeneral<int32_t>(); - } - else if (_output->data_type() == OperandType::INT64) - { - concatenationGeneral<int64_t>(); - } - else - throw std::runtime_error("Concat: unsupported data type"); } } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 799e9e2d0..c964e38f9 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -203,8 +203,6 @@ void ConvolutionLayer::prepare() _prepare = true; } -#undef ANDROID_NN_CONV_PARAMETERS - } // namespace ops } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc new file mode 100644 index 000000000..d265d0ac2 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthToSpaceLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/DepthToSpace.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr) +{ + // DO NOTHING +} + +template <typename T> void DepthToSpaceLayer::depthToSpace() +{ + nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()), + _block_size); +} + +void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size, + IPortableTensor *output) +{ + _input = input; + _block_size = block_size; + _output = output; +} + +void DepthToSpaceLayer::run() +{ + switch (_input->data_type()) + { + case OperandType::FLOAT32: + depthToSpace<float>(); + break; + case OperandType::INT32: + depthToSpace<int32_t>(); + break; + case OperandType::INT64: + depthToSpace<int64_t>(); + break; + case OperandType::QUANT_UINT8_ASYMM: + depthToSpace<uint8_t>(); + break; + case OperandType::QUANT_INT8_ASYMM: + depthToSpace<int8_t>(); + break; + default: + throw std::runtime_error{"DepthToSpace: unsupported data type"}; + } +} + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h new file mode 100644 index 000000000..32e0171ce --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in riting, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ + +#include <backend/IPortableTensor.h> + +#include <exec/IFunction.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +class DepthToSpaceLayer : public ::onert::exec::IFunction +{ +public: + DepthToSpaceLayer(); + + void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output); + + void run() override; + +private: + template <typename T> void depthToSpace(); + + const IPortableTensor *_input; + int32_t _block_size; + IPortableTensor *_output; +}; + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index f1dc1103a..85553d14d 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32() op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv<float, float>( op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::convQuant8() @@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8() op_params.quantized_activation_min = output_activation_min; op_params.quantized_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv<uint8_t, int32_t>( op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); + getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::configure( @@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure( const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, - const ir::Activation activation, IPortableTensor *output) + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) { _input = input; _kernel = kernel; @@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure( _dilationHeight = dilationHeight; _activation = activation; _output = output; + _external_context = external_context; } void DepthwiseConvolutionLayer::run() diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h index fb032ecbf..fe1fcc182 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h @@ -19,6 +19,7 @@ #include <backend/IPortableTensor.h> #include "OperationUtils.h" +#include "../ExternalContext.h" #include <exec/IFunction.h> @@ -47,7 +48,7 @@ public: const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, - IPortableTensor *output); + IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context); void run() override; @@ -71,6 +72,8 @@ private: uint32_t _dilationHeight{1}; ir::Activation _activation{ir::Activation::NONE}; + + std::shared_ptr<ExternalContext> _external_context; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc index c1d63172b..3e1da5ec0 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc @@ -18,6 +18,8 @@ #include "OperationUtils.h" +#include <cker/operation/ELU.h> +#include <cker/operation/LeakyReLU.h> #include <cker/operation/Logistic.h> #include <cker/operation/ReLU.h> #include <cker/operation/ReLU6.h> @@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab switch (op_type) { + case ElementwiseActivationType::kElu: + if (input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"}; + } + break; case ElementwiseActivationType::kLogistic: if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { @@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; } break; + case ElementwiseActivationType::kLeakyReLU: + if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), + reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"}; + } + break; default: throw std::runtime_error("ElementwiseActivationLayer: unsupported op type"); } diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h index 3ef580041..948ab3b57 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h @@ -32,9 +32,11 @@ namespace ops enum class ElementwiseActivationType { + kElu, kLogistic, kReLU, - kTanh + kTanh, + kLeakyReLU }; class ElementwiseActivationLayer : public ::onert::exec::IFunction diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc index ea3c1e7cd..1e17a0828 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc @@ -18,6 +18,7 @@ #include "OperationUtils.h" +#include <cker/operation/LogicalAnd.h> #include <cker/operation/LogicalOr.h> #include <cker/operation/MaxMin.h> @@ -33,6 +34,25 @@ namespace ops namespace { template <typename T> +void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output) +{ + if (!HaveSameShapes(lhs, rhs)) + { + nnfw::cker::LogicalAndBroadcast<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs), + reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + } + else + { + nnfw::cker::LogicalAndElementwise<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer())); + } +} + +template <typename T> void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) { @@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab switch (op_type) { + case ElementwiseBinaryType::kLogicalAnd: + if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) + { + _kernel = logicalAndGeneric<bool>; + } + else + { + throw std::runtime_error{"LogicalOr: Unsupported data type"}; + } + break; case ElementwiseBinaryType::kLogicalOr: if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc index 066455e72..15d7f3049 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc @@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output) getTensorShape(output), reinterpret_cast<float *>(output->buffer())); } +void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void squareFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output) { if (!HaveSameShapes(input, output)) @@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen throw std::runtime_error{"Sin: Unsupported data type"}; } break; + case ElementwiseUnaryType::kSqrt: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = sqrtFloat32; + } + else + { + throw std::runtime_error{"Sqrt: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kSquare: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = squareFloat32; + } + else + { + throw std::runtime_error{"Square: Unsupported data type"}; + } + break; case ElementwiseUnaryType::kZerosLike: if (input->data_type() == OperandType::FLOAT32) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h index c1765b5b7..54a6fc02a 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h @@ -46,6 +46,8 @@ enum class ElementwiseUnaryType kRound, kRSqrt, kSin, + kSqrt, + kSquare, kZerosLike }; diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc index b545e6743..5ea0ea893 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc @@ -25,22 +25,19 @@ namespace cpu namespace ops { -ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr) +ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr) { // DO NOTHING } -void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output) +void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output) { _input = input; - _axis = axis; _output = output; } void ExpandDimsLayer::run() { - // TODO use _axis to calculate shape of output when _axis is not constant size_t count = _input->total_size(); memcpy(_output->buffer(), _input->buffer(), count); } diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h index b5d4938b5..1b7ead0c3 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h @@ -36,14 +36,12 @@ public: ExpandDimsLayer(); public: - void configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output); + void configure(const IPortableTensor *input, IPortableTensor *output); void run() override; private: const IPortableTensor *_input; - const IPortableTensor *_axis; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc index df3f8b7cd..5b7c17907 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.cc +++ b/runtime/onert/backend/cpu/ops/FillLayer.cc @@ -29,15 +29,13 @@ namespace cpu namespace ops { -FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr) +FillLayer::FillLayer() : _value(nullptr), _output(nullptr) { // DO NOTHING } -void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output) +void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output) { - _input = input; _value = value; _output = output; } @@ -47,28 +45,24 @@ void FillLayer::run() switch (_output->data_type()) { case OperandType::FLOAT32: - nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<float *>(_value->buffer()), + nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); break; case OperandType::INT32: - nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<int32_t *>(_value->buffer()), + nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); break; case OperandType::INT64: - nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<int64_t *>(_value->buffer()), + nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()), getTensorShape(_output), reinterpret_cast<int64_t *>(_output->buffer())); break; case OperandType::UINT32: - nnfw::cker::Fill<uint32_t *>( - getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()), - reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output), - reinterpret_cast<uint32_t *>(_output->buffer())); + nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()), + getTensorShape(_output), + reinterpret_cast<uint32_t *>(_output->buffer())); break; default: throw std::runtime_error{"Fill: unsupported data type"}; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h index 1f17d6b68..ce843654a 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.h +++ b/runtime/onert/backend/cpu/ops/FillLayer.h @@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction public: FillLayer(); - void configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output); + void configure(const IPortableTensor *value, IPortableTensor *output); void run() override; private: - const IPortableTensor *_input; const IPortableTensor *_value; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc index 4921ac748..f130692ee 100644 --- a/runtime/onert/backend/cpu/ops/MeanLayer.cc +++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc @@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee void MeanLayer::MeanFloat32() { - nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), - getReducerAxes(_axes)); + const auto inputShape = getTensorShape(_input); + const auto axisVec = getReducerAxes(_axes); + bool axis_is_1_and_2 = + _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 && + ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1)); + + if (axis_is_1_and_2) + { + nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_output), + reinterpret_cast<float *>(_output->buffer())); + } + else + { + nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + axisVec); + } } void MeanLayer::MeanQuant8() @@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a _axes = axes; _output = output; _keep_dims = keep_dims; + + if (_input->data_type() != OperandType::FLOAT32 && + _input->data_type() != OperandType::QUANT_UINT8_ASYMM) + throw std::runtime_error{"Mean: unsupported data type"}; } void MeanLayer::run() diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h new file mode 100644 index 000000000..bc8a024d8 --- /dev/null +++ b/runtime/onert/backend/ruy/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_H__ +#define __ONERT_BACKEND_RUY_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include <backend/Backend.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<onert::backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique<BackendContext>(this, &graph); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_H__ diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc new file mode 100644 index 000000000..ef686f480 --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h new file mode 100644 index 000000000..b965c9a9d --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) + { + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt new file mode 100644 index 000000000..206acbfbf --- /dev/null +++ b/runtime/onert/backend/ruy/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIB_ONERT_BACKEND_RUY onert_backend_ruy) + +nnfw_find_package(Ruy REQUIRED) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy) + +set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib) diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc index dac8f898b..179caa9a6 100644 --- a/runtime/onert/backend/cpu/Tensor.cc +++ b/runtime/onert/backend/ruy/Config.cc @@ -14,18 +14,18 @@ * limitations under the License. */ -#include "Tensor.h" +#include "Config.h" namespace onert { namespace backend { -namespace cpu +namespace ruy { -// `dynamic_cast` not working across library boundaries on NDK -// With this as a key function, `dynamic_cast` works across dl -ExternalTensor::~ExternalTensor() {} +bool Config::initialize() { return true; } + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h new file mode 100644 index 000000000..9160dd5b1 --- /dev/null +++ b/runtime/onert/backend/ruy/Config.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONFIG_H__ +#define __ONERT_BACKEND_RUY_CONFIG_H__ + +#include <backend/IConfig.h> +#include <memory> +#include <util/ITimer.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "ruy"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONFIG_H__ diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h new file mode 100644 index 000000000..24b4d924d --- /dev/null +++ b/runtime/onert/backend/ruy/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ + +#include <backend/cpu_common/ConstantInitializer.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h new file mode 100644 index 000000000..f51faccb8 --- /dev/null +++ b/runtime/onert/backend/ruy/ExternalContext.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ + +#include <util/ConfigSource.h> +#include <ruy/context.h> + +namespace +{ +const int kDefaultNumThreadpoolThreads = 4; +} + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class ExternalContext +{ +public: + ExternalContext() : _ruy_context(new ::ruy::Context) + { + setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS)); + } + + void setMaxNumThreads(int max_num_threads) + { + const int target_num_threads = + max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads; + _ruy_context->set_max_num_threads(target_num_threads); + } + + ::ruy::Context *ruy_context() const { return _ruy_context.get(); } + +private: + const std::unique_ptr<::ruy::Context> _ruy_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc new file mode 100644 index 000000000..cd2825068 --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> +#include <exec/DynamicShapeInferer.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique<exec::FunctionSequence>(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique<ops::ConvolutionLayer>(); + + if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic()) + { + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left, + param_padding.param.right, param_padding.param.top, param_padding.param.bottom, + stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor, + activation, ofm_tensor, _external_context); + + _return_fn = std::move(fn); + return; + } + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + const auto weights_format = node.param().weights_format; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::FullyConnectedLayer>(); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h new file mode 100644 index 000000000..0f6bd590a --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include <backend/CustomKernelBuilder.h> +#include <backend/cpu_common/KernelGeneratorBase.h> +#include <ir/Operands.h> +#include <ir/Operations.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h new file mode 100644 index 000000000..af2d25241 --- /dev/null +++ b/runtime/onert/backend/ruy/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h new file mode 100644 index 000000000..60d0fbf77 --- /dev/null +++ b/runtime/onert/backend/ruy/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_H__ +#define __ONERT_BACKEND_RUY_TENSOR_H__ + +#include <backend/cpu_common/Tensor.h> +#include <ir/Data.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_H__ diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc new file mode 100644 index 000000000..c77defc30 --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // CPU backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h new file mode 100644 index 000000000..91c07bd82 --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ + +#include <backend/cpu_common/DynamicTensorManager.h> +#include <backend/cpu_common/TensorRegistry.h> + +#include <ir/OperandIndexMap.h> + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); + + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; + std::unique_ptr<StaticTensorManager> _static_tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc new file mode 100644 index 000000000..d249b2ce3 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "../Tensor.h" +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer() + : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1), + _dilationHeightFactor(1), _activation(ir::Activation::NONE), + _conv_kernel(new nnfw::ruy::Conv()), _prepare(false) +{ + // DO NOTHING +} + +ConvolutionLayer::~ConvolutionLayer() = default; + +void ConvolutionLayer::convFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + nnfw::ruy::ConvParams op_params; + op_params.padding_type = getPaddingType(_paddingType); + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), + getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, const ir::PaddingType paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _dilationWidthFactor = dilationWidthFactor; + _dilationHeightFactor = dilationHeightFactor; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void ConvolutionLayer::run() +{ + prepare(); + + if (_input->is_dynamic() || _kernel->is_dynamic()) + { + const auto ifm_shape = _input->getShape().asFeature(_input->layout()); + const auto ofm_shape = _output->getShape().asFeature(_input->layout()); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = _kernel->getShape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + ir::Stride stride; + stride.vertical = _strideWidth; + stride.horizontal = _strideWidth; + + ir::Padding param_padding; + param_padding.type = _paddingType; + param_padding.param.left = _paddingLeft; + param_padding.param.right = _paddingRight; + param_padding.param.top = _paddingTop; + param_padding.param.bottom = _paddingBottom; + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + _dilationWidthFactor, _dilationHeightFactor); + + _paddingLeft = padding.left; + _paddingRight = padding.right; + _paddingTop = padding.top; + _paddingBottom = padding.bottom; + } + if (_input->data_type() == OperandType::FLOAT32) + { + convFloat32(); + } + else + { + throw std::runtime_error{"Conv: unsupported data type"}; + } +} + +void ConvolutionLayer::prepare() +{ + if (_prepare) + return; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant()) + { + kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output), + _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor); + } + _prepare = true; +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h new file mode 100644 index 000000000..a55387b93 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ + +#include <backend/IPortableTensor.h> +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include <ruy/operation/Conv.h> +#include <exec/IFunction.h> +#include <functional> +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class ConvolutionLayer : public ::onert::exec::IFunction +{ +public: + ConvolutionLayer(); + ~ConvolutionLayer(); + +public: + void convFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType _paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, + IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _dilationWidthFactor; + uint32_t _dilationHeightFactor; + + ir::Activation _activation; + + std::unique_ptr<nnfw::ruy::Conv> _conv_kernel; + + bool _prepare; + + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc new file mode 100644 index 000000000..af693e3b4 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "../Tensor.h" +#include <ruy/operation/FullyConnected.h> +#include <ruy/TensorUtils.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE), _external_context(nullptr) +{ + // DO NOTHING +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + nnfw::ruy::FullyConnectedParams op_params; + + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + op_params.activation = convertActivationType(_activation); + op_params.lhs_cacheable = _weights->is_constant(); + op_params.rhs_cacheable = _input->is_constant(); + + nnfw::ruy::FullyConnected( + op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), + getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()), + getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr), + getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), + _external_context->ruy_context()); +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, + IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context) +{ + UNUSED_RELEASE(weights_format); + _input = input; + _weights = weights; + _bias = bias; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void FullyConnectedLayer::run() +{ + if (_input->data_type() == OperandType::FLOAT32) + { + fullyConnectedFloat32(); + } + else + { + throw std::runtime_error{"FullyConnected: unsupported data type"}; + } +} + +void FullyConnectedLayer::prepare() +{ + if (_bias && _bias->is_constant()) + { + const int bias_size = getTensorShape(_bias).FlatSize(); + if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size)) + { + _bias = nullptr; + } + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h new file mode 100644 index 000000000..33d560f0b --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ + +#include <backend/IPortableTensor.h> +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include <exec/IFunction.h> + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class FullyConnectedLayer : public ::onert::exec::IFunction +{ +public: + FullyConnectedLayer(); + ~FullyConnectedLayer(); + +public: + void fullyConnectedFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output, + const std::shared_ptr<ExternalContext> &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_weights; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; + + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc new file mode 100644 index 000000000..929107b1a --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type) +{ + switch (ir_padding_type) + { + case ir::PaddingType::EXPLICIT: + return nnfw::ruy::PaddingType::kNone; + case ir::PaddingType::SAME: + return nnfw::ruy::PaddingType::kSame; + case ir::PaddingType::VALID: + return nnfw::ruy::PaddingType::kValid; + default: + throw std::runtime_error("Wrong padding type."); + break; + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h new file mode 100644 index 000000000..5dfdc7ec5 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ + +#include <backend/IPortableTensor.h> + +#include <ruy/Shape.h> +#include <ruy/Types.h> +#include <iostream> +#include <ir/DataType.h> +#include <ir/InternalType.h> +#include <ir/Padding.h> + +#include <limits> + +using OperandType = onert::ir::DataType; + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor) +{ + if (tensor == nullptr) + return nnfw::ruy::Shape(); + + const ir::Shape &shape = tensor->get_info().shape(); + + assert(tensor->layout() == ir::Layout::NHWC); + + auto rank = shape.rank(); + nnfw::ruy::Shape ret(rank); + auto data = ret.DimsData(); + for (int i = 0; i < rank; ++i) + { + data[i] = shape.dim(i); + } + return ret; +} + +inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation) +{ + switch (activation) + { + case ir::Activation::NONE: + return nnfw::ruy::FusedActivationFunctionType::kNone; + case ir::Activation::RELU: + return nnfw::ruy::FusedActivationFunctionType::kRelu; + case ir::Activation::RELU1: + return nnfw::ruy::FusedActivationFunctionType::kRelu1; + case ir::Activation::RELU6: + return nnfw::ruy::FusedActivationFunctionType::kRelu6; + case ir::Activation::TANH: + return nnfw::ruy::FusedActivationFunctionType::kTanh; + case ir::Activation::SIGMOID: + return nnfw::ruy::FusedActivationFunctionType::kSigmoid; + default: + throw std::runtime_error{"RUY backend: Cannot convert activation type"}; + } +} + +template <typename T> +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type); + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc new file mode 100644 index 000000000..4f33590e9 --- /dev/null +++ b/runtime/onert/backend/ruy/ruy.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +extern "C" { + +onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } +} diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h new file mode 100644 index 000000000..b7aef1625 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include <backend/Backend.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<onert::backend::BackendContext> + newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique<BackendContext>(this, &graph); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__ diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc new file mode 100644 index 000000000..503d088aa --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h new file mode 100644 index 000000000..f81175b9e --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include <util/ConfigSource.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace +{ +const int kDefaultNumThreadpoolThreads = 1; +} + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(nullptr) + { + int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS); + if (num_threads < 1) + num_threads = kDefaultNumThreadpoolThreads; // default num of threads + _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads))); + } + + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt new file mode 100644 index 000000000..e3de31e6f --- /dev/null +++ b/runtime/onert/backend/xnnpack/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack) + +# Unsupported architecture +nnfw_find_package(Xnnpack QUIET) +if(NOT Xnnpack_FOUND) + return() +endif(NOT Xnnpack_FOUND) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK) + +set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib) diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/backend/xnnpack/Config.cc index 4844d21b9..4d42a3f18 100644 --- a/runtime/onert/core/include/backend/IOptimizer.h +++ b/runtime/onert/backend/xnnpack/Config.cc @@ -14,38 +14,31 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__ -#define __ONERT_BACKEND_I_OPTIMIZER_H__ +#include "Config.h" -namespace onert -{ -namespace ir -{ -class LoweredGraph; -} -} // namespace onert +#include <xnnpack.h> namespace onert { namespace backend { +namespace xnnpack +{ -/** - * @brief Class for backend optimizations. This is an optional class so not all backends must have - * it. - * - */ -struct IOptimizer +Config::~Config() { xnn_deinitialize(); } + +bool Config::initialize() { - virtual ~IOptimizer() = default; - /** - * @brief Run optimization - * - */ - virtual void optimize() = 0; -}; + xnn_status status = xnn_initialize(nullptr /* allocator */); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to initialize XNNPACK"}; + } + return true; +} + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } +} // namespace cpu } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_I_OPTIMIZER_H__ diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h new file mode 100644 index 000000000..2cf7406e5 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Config.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__ +#define __ONERT_BACKEND_XNNPACK_CONFIG_H__ + +#include <backend/IConfig.h> +#include <memory> +#include <util/ITimer.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Config : public IConfig +{ +public: + virtual ~Config(); + +public: + std::string id() override { return "xnnpack"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__ diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h new file mode 100644 index 000000000..45cdd8cd9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ + +#include <backend/cpu_common/ConstantInitializer.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.cc index 88ffb502c..3a9fe1b55 100644 --- a/runtime/onert/core/include/backend/IExternalContext.h +++ b/runtime/onert/backend/xnnpack/ExternalContext.cc @@ -14,21 +14,23 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__ -#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__ +#include "ExternalContext.h" + +#include <cassert> namespace onert { namespace backend { +namespace xnnpack +{ -struct IExternalContext +ExternalContext::ExternalContext(size_t num_threads) + : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy) { - virtual ~IExternalContext() = default; - virtual void setMaxNumThreads(int) = 0; -}; + assert(_threadpool); +} +} // namespace xnnpack } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__ diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h new file mode 100644 index 000000000..682fd2e4e --- /dev/null +++ b/runtime/onert/backend/xnnpack/ExternalContext.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ + +#include <memory> +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class ExternalContext +{ +public: + ExternalContext(size_t num_threads); + +public: + pthreadpool *getThreadPool() { return _threadpool.get(); } + +private: + std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc new file mode 100644 index 000000000..b7d3f60fb --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/DepthwiseConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> +#include <exec/DynamicShapeInferer.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique<exec::FunctionSequence>(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context); + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto dilation_width = node.param().dilation.width_factor; + const auto dilation_height = node.param().dilation.height_factor; + const auto param_padding = node.param().padding; + const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, + ker_height, dilation_width, dilation_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + multiplier, dilation_width, dilation_height, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor); + + _return_fn = std::move(fn); +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h new file mode 100644 index 000000000..265824204 --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include <backend/CustomKernelBuilder.h> +#include <backend/cpu_common/KernelGeneratorBase.h> +#include <ir/Operands.h> +#include <ir/Operations.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, + const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, + const std::shared_ptr<ExternalContext> &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h new file mode 100644 index 000000000..f7344e8d8 --- /dev/null +++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h new file mode 100644 index 000000000..b39cbd266 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_H__ + +#include <backend/cpu_common/Tensor.h> +#include <ir/Data.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__ diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc new file mode 100644 index 000000000..b570144ce --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // XNNPACK backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h new file mode 100644 index 000000000..dddfedbf9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ + +#include <backend/cpu_common/DynamicTensorManager.h> +#include <backend/cpu_common/TensorRegistry.h> + +#include <ir/OperandIndexMap.h> + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); + + /** + * @brief Register tensor information to allocate on XNNPACK backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; + std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; + std::unique_ptr<StaticTensorManager> _static_tensor_mgr; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc new file mode 100644 index 000000000..0612995c2 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void ConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 Convolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK Conv: unsupported data type"}; + } +} + +bool ConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(3); + assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels); + assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + 1 /* groups */, input_channels /* group_input_channels */, + output_channels /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast<const float *>(_kernel->buffer()), + reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, + output_activation_max, 0, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool ConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h new file mode 100644 index 000000000..6cbaa9f3a --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class ConvolutionLayer : public Layer +{ +public: + ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc new file mode 100644 index 000000000..947f04194 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer( + const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::configure( + const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, + ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width, + const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _multiplier = multiplier; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void DepthwiseConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"}; + } +} + +bool DepthwiseConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(3); + uint32_t input_channels = _input->getShape().dim(3); + assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels); + assert(output_channels == input_channels * _multiplier); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + input_channels /* groups */, 1 /* group_input_channels */, + _multiplier /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast<const float *>(_kernel->buffer()), + reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, + output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool DepthwiseConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h new file mode 100644 index 000000000..10f840ae7 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class DepthwiseConvolutionLayer : public Layer +{ +public: + DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, + IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _multiplier; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc new file mode 100644 index 000000000..d595fda36 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + IPortableTensor *output) +{ + _input = input; + _kernel = weights; + _bias = bias; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void FullyConnectedLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 FullyConnected operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK FC: unsupported data type"}; + } +} + +bool FullyConnectedLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max); + + const auto &kernel_shape = _kernel->getShape(); + assert(kernel_shape.rank() == 2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(1); + + const auto &input_shape = _input->getShape(); + const auto &output_shape = _output->getShape(); + uint32_t flag = 0; + if (input_shape.rank() != output_shape.rank()) + { + flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D; + assert(input_shape.num_elements() % input_channels == 0); + } + else + { + assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels); + } + + assert(_kernel && _kernel->buffer()); + const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer()); + const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr; + + enum xnn_status status = xnn_create_fully_connected_nc_f32( + input_channels, output_channels, input_channels /* input stride */, + output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min, + output_activation_max, flag, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool FullyConnectedLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1); + enum xnn_status status = xnn_setup_fully_connected_nc_f32( + _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()), + reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h new file mode 100644 index 000000000..883607ef9 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ + +#include "Layer.h" + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class FullyConnectedLayer : public Layer +{ +public: + FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *_kernel, + const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h new file mode 100644 index 000000000..68b610f33 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/Layer.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ + +#include <exec/IFunction.h> +#include <backend/IPortableTensor.h> +#include "OperationUtils.h" +#include "../ExternalContext.h" +#include "../Tensor.h" + +#include <cassert> +#include <memory> + +#include <xnnpack.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class Layer : public ::onert::exec::IFunction +{ +public: + Layer(const std::shared_ptr<ExternalContext> external_context) + : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context} + { + // DO NOTHING + } + + ~Layer() + { + if (_kernel_op) + xnn_delete_operator(_kernel_op); + } + +public: + void prepare() override + { + if (_create) + return; + + _create = create(); + assert(_create); + + _setup = setup(); + } + virtual bool create() = 0; + virtual bool setup() = 0; + +protected: + xnn_operator_t _kernel_op; + bool _create; + bool _setup; + const std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h new file mode 100644 index 000000000..5102e32dd --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ + +// duplicated from cpu/ops/OperationUtils.h +#include <ir/InternalType.h> +#include <ir/Padding.h> +#include <ir/DataType.h> + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +using OperandType = ir::DataType; + +template <typename T> +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported fused activation function"}; + } +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc new file mode 100644 index 000000000..38a6c5572 --- /dev/null +++ b/runtime/onert/backend/xnnpack/xnnpack.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +#include <util/logging.h> + +extern "C" { +onert::backend::Backend *onert_backend_create() +{ + VERBOSE(onert_backend_create) << "'xnnpack' loaded\n"; + return new onert::backend::xnnpack::Backend; +} + +void onert_backend_destroy(onert::backend::Backend *backend) +{ + VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n"; + delete backend; +} +} diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h index 1eba29550..4d212156a 100644 --- a/runtime/onert/core/include/backend/BackendContext.h +++ b/runtime/onert/core/include/backend/BackendContext.h @@ -19,6 +19,8 @@ #include <memory> #include "ir/Graph.h" +#include "ir/LowerInfoMap.h" +#include "exec/FunctionSequence.h" namespace onert { @@ -26,12 +28,10 @@ namespace backend { class Backend; -class IConstantInitializer; -class IKernelGenerator; -class ITensorRegister; struct ITensorRegistry; -struct ITensorBuilder; -struct IOptimizer; + +using FunctionMap = + std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>; class BackendContext { @@ -46,15 +46,8 @@ public: public: BackendContext(const Backend *backend, const ir::Graph *graph, - std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, - std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, - std::shared_ptr<ITensorRegister> tensor_register = nullptr, - std::shared_ptr<IOptimizer> optimizer = nullptr) - : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}, - tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, - kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer} + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr) + : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry} { } @@ -66,8 +59,19 @@ public: const Backend *backend() const { return _backend; } const ir::Graph *graph() const { return _graph; } - const std::vector<OperationInfo> &operation_list() { return _operation_list; } - const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; } + const std::vector<OperationInfo> &operation_list() const { return _operation_list; } + const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; } + + virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &, + const ir::OpSequences &, const ir::LowerInfoMap &) + { + return nullptr; + } + virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &, + const ir::OpSequences &) + { + return {}; + } private: const Backend *_backend{nullptr}; @@ -77,11 +81,6 @@ private: public: std::shared_ptr<ITensorRegistry> tensor_registry; - std::shared_ptr<ITensorBuilder> tensor_builder; - std::shared_ptr<IConstantInitializer> constant_initializer; - std::shared_ptr<IKernelGenerator> kernel_gen; - std::shared_ptr<ITensorRegister> tensor_register; - std::shared_ptr<IOptimizer> optimizer; }; using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>; diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h deleted file mode 100644 index 97721cf19..000000000 --- a/runtime/onert/core/include/backend/ITensorBuilder.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__ -#define __ONERT_BACKEND_ITENSOR_BUILDER_H__ - -#include <map> - -#include "ir/Index.h" -#include "ir/OperandInfo.h" -#include "ir/Operation.h" -#include "ir/Layout.h" -#include "ITensor.h" -#include "ITensorManager.h" -#include "ITensorRegistry.h" -#include "IDynamicTensorManager.h" - -namespace onert -{ -namespace backend -{ - -struct ITensorBuilder -{ - using IterateFunction = std::function<void(const ir::OperandIndex &)>; - - virtual ~ITensorBuilder(void) = default; - - /** - * @brief Register tensor information to allocate on backend - * - * @param ind Index - * @param info Info - * @param backend_layout Backend layout - * @param as_const Whether this tensor is constant - */ - virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) = 0; - - /** - * @brief Check if the tensor has been registered with @c registerTensorInfo - * - * @return true If the tensor has been registered - * @return false Otherwise - */ - virtual bool isRegistered(const ir::OperandIndex &) const = 0; - -public: // methods for static tensor allocation - /** - * @brief Let the tensor builder know first use(start of lifetime) of a tensor - * Must be called before calling @c prepare - * Must be run up to once for each tensor before calling @c notifyLastUse - * NOTE: Useful only for static models - */ - virtual void notifyFirstUse(const ir::OperandIndex &) = 0; - /** - * @brief Let the tensor builder know last use(end of lifetime) of a tensor - * Must be run up to once for each tensor after calling @c notifyFirstUse - * NOTE: Useful only for static models - */ - virtual void notifyLastUse(const ir::OperandIndex &) = 0; - /** - * @brief Prepare the tensors - * Before calling this, all the tensors must be registered - */ - virtual void prepare(void) = 0; - /** - * @brief Allocate the tensors - * Before calling this, @c prepare must be called - */ - virtual void allocate() = 0; - /** - * @brief Some actions after functions' @c IFunction::prepare method. - * This is called right after each function's @c IFunction::prepare function has been - * called. - */ - virtual void postFunctionPrepare() = 0; - -public: // methods for dynamic tensor allocation - /** - * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception - * will be thrown. - * - * @return pointer of IDynamicTensorManager object - * - * @note Since it is a pointer, its life time is from the cration of TensorBuilder - * to the end of execution - */ - virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; } -}; - -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__ diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h deleted file mode 100644 index b8e521ce3..000000000 --- a/runtime/onert/core/include/backend/ITensorRegister.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__ -#define __ONERT_BACKEND_ITENSOR_REGISTER_H__ - -#include "ir/LowerInfoMap.h" -#include "ITensorBuilder.h" -#include "ir/Layout.h" -#include "ir/OperandIndexSequence.h" -#include "ir/OperandInfo.h" -#include "ir/Operands.h" -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace backend -{ - -class ITensorRegister : public ir::OperationVisitor -{ -public: - virtual ~ITensorRegister() = default; - -public: - void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map) - { - _current_op_seq_layout = op_seq.getLayout(); - _lower_info_map = lower_info_map; - assert(_lower_info_map != nullptr); - assert(tensor_builder().get() != nullptr); - op_seq.accept(*this); - } - -protected: - virtual const ir::Operands &operands() const = 0; - virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0; - -protected: -#define OP(InternalName) \ - void visit(const ir::operation::InternalName &node) override \ - { \ - for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \ - { \ - defaultRegisterTensorInfo(ind); \ - } \ - } -#include "ir/Operations.lst" -#undef OP - -protected: - void defaultRegisterTensorInfo(const ir::OperandIndex &index) const - { - if (tensor_builder()->isRegistered(index)) - { - return; - } - - const auto &obj = operands().at(index); - const auto frontend_layout = frontendLayout(); - const auto backend_layout = backendLayout(index); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; - tensor_builder()->registerTensorInfo(index, backend_info, backend_layout); - } - -protected: - ir::Layout frontendLayout() const { return _current_op_seq_layout; } - ir::Layout backendLayout(const ir::OperandIndex &index) const - { - assert(_lower_info_map != nullptr); - const auto lower_info = _lower_info_map->operand.at(index).get(); - return lower_info->def_factors().getOnlyElement().layout(); - } - -private: - ir::Layout _current_op_seq_layout; - const ir::LowerInfoMap *_lower_info_map{nullptr}; -}; - -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h new file mode 100644 index 000000000..19e7b7c99 --- /dev/null +++ b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ +#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ + +#include <vector> + +#include "ir/Index.h" +#include "ir/OpSequences.h" +#include "ir/LowerInfoMap.h" +#include "util/logging.h" + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +// TODO Remove the template param BackendContext once unification of cpu backend context is done +template <typename T_BackendContext> +void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + auto graph = ctx.graph(); + auto tensor_builder = ctx.tensor_builder; + + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + auto model_io = + (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + // Prepare scanning + for (auto ind : ctx.operand_list()) + { + if (model_io.contains(ind)) + continue; + const auto &obj = graph->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != ctx.backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | + ir::Remove::UNDEFINED; + auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED | + ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + + // plan for deallocation of dynamic tensor + auto dyn_tensor_manager = tensor_builder->dynamicTensorManager(); + auto *tensor = ctx.tensor_registry->getITensor(ind); + assert(tensor); + dyn_tensor_manager->planDealloc(op_idx, tensor); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); +} + +} // namespace cpu_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h new file mode 100644 index 000000000..679355599 --- /dev/null +++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ + +#include "TensorRegistry.h" + +#include "ConstantInitializerBase.h" +#include <ir/Operands.h> + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +class ConstantInitializer : public ConstantInitializerBase +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<ITensorRegistry> &tensor_reg); + +public: + void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; + + // TODO: For now the only cpu backend supports constant tensor to use data from external + // If the other backend supports (to do this, + // ExternalTensor should be abstract such as IExternal, maybe), + // this can be an interface of cpu_common::ConstantInitializerBase + void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &); + +private: + std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } + +private: + std::shared_ptr<ITensorRegistry> _tensor_reg; +}; + +} // namespace cpu_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h index 149acecb4..d4c65de38 100644 --- a/runtime/onert/core/include/backend/IConstantInitializer.h +++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h @@ -14,20 +14,21 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ -#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ +#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ #include <unordered_map> #include <functional> -#include "ITensorBuilder.h" #include "ir/Coordinates.h" #include "ir/Layout.h" #include "ir/Operand.h" #include "ir/Operands.h" #include "ir/OperationVisitor.h" #include "ir/OpSequence.h" +#include "backend/ITensorRegistry.h" #include "util/logging.h" +#include "backend/ITensorRegistry.h" namespace { @@ -153,11 +154,13 @@ namespace onert { namespace backend { +namespace cpu_common +{ -class IConstantInitializer : public ir::OperationVisitor +class ConstantInitializerBase : public ir::OperationVisitor { public: - virtual ~IConstantInitializer() = default; + virtual ~ConstantInitializerBase() = default; public: void run() @@ -178,15 +181,15 @@ public: } public: - IConstantInitializer(const ir::Operands &operands) - : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN} + ConstantInitializerBase(const ir::Operands &operands) + : _operands{operands}, _current_layout{ir::Layout::UNKNOWN} { } public: using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>; - void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; } + void setLayout(ir::Layout layout) { _current_layout = layout; } protected: virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0; @@ -221,10 +224,11 @@ public: protected: const ir::Operands &_operands; std::unordered_map<ir::OperandIndex, Initializer> _init_map; - ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout + ir::Layout _current_layout; }; +} // namespace cpu_common } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h index afc34ec21..49a589768 100644 --- a/runtime/onert/core/include/backend/IKernelGenerator.h +++ b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h @@ -14,28 +14,30 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__ -#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__ +#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ +#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ #include <assert.h> #include <memory> #include <functional> -#include "ITensorBuilder.h" #include "ir/OperationVisitor.h" #include "ir/OpSequence.h" #include <memory> #include "exec/FunctionSequence.h" +#include "backend/ITensorRegistry.h" namespace onert { namespace backend { +namespace cpu_common +{ -class IKernelGenerator : public ir::OperationVisitor +class KernelGeneratorBase : public ir::OperationVisitor { public: - virtual ~IKernelGenerator() = default; + virtual ~KernelGeneratorBase() = default; std::unique_ptr<exec::IFunction> releaseFunction() { @@ -70,7 +72,8 @@ protected: std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out }; +} // namespace cpu_common } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__ +#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h index fa50b551e..850bcf2f2 100644 --- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h @@ -17,9 +17,11 @@ #ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__ -#include "MemoryManager.h" - #include "backend/IStaticTensorManager.h" +#include "backend/cpu_common/DynamicTensorManager.h" +#include "backend/cpu_common/MemoryManager.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "backend/ITensorManager.h" #include "ir/OperandIndexMap.h" #include "ir/OperandInfo.h" #include "TensorRegistry.h" @@ -37,12 +39,10 @@ class StaticTensorManager : public backend::IStaticTensorManager { public: StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, - DynamicMemoryManager *dynamic_mem_mgr); + DynamicTensorManager *dynamic_tensor_manager); virtual ~StaticTensorManager() = default; - void allocateConsts(void); void allocateNonconsts(void); - void deallocateConsts(void); void deallocateNonconsts(void); void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, @@ -54,11 +54,10 @@ public: void iterate(const std::function<void(const ir::OperandIndex &)> &fn); private: - std::unique_ptr<DynamicMemoryManager> _const_mgr; std::unique_ptr<MemoryManager> _nonconst_mgr; const std::shared_ptr<TensorRegistry> _tensors; ir::OperandIndexMap<bool> _as_constants; - DynamicMemoryManager *_dynamic_mem_mgr; + DynamicTensorManager *_dynamic_tensor_manager; }; } // namespace cpu_common diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h index 5fa20e15d..5fbf4e729 100644 --- a/runtime/onert/core/include/backend/cpu_common/Tensor.h +++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h @@ -21,6 +21,7 @@ #include <backend/IPortableTensor.h> #include <ir/OperandInfo.h> +#include <ir/Data.h> namespace onert { @@ -177,6 +178,91 @@ private: std::shared_ptr<Allocator> _allocator; }; +/** + * @brief Class that uses data from external memory that is not managed by a backend + * instead of allocating and copying the data. ExternalTensor's data pointer points to + * an address of memory such as where memory is already allocated, or mmapped area. + * This is meaning that ExternalTensor can take all of types' ir::Data. + * To support this, assume below things no padding, always NHWC layout, + * constant tensor and not dynamic. + */ +class ExternalTensor : public Tensor +{ +public: + ExternalTensor() = delete; + virtual ~ExternalTensor(); + +public: + ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) + : Tensor(info, layout, nullptr) + { + assert(_layout == ir::Layout::NHWC); + assert(_info.isConstant()); + assert(_info.isDynamic() == false); + } + +public: + /** + * @brief set Data to be shared from external so that this ExternalTensor will not be + * allocated on CPU backend + * @param[in] data data of Operand to be set + */ + void setData(const std::shared_ptr<ir::Data> data) + { + assert(data != nullptr); + _data = data; + // Note. Some op such as cker::Conv could take buffer as nullptr. + // That's why _buffer also would be used + _buffer = const_cast<uint8_t *>(_data->base()); + } + +public: + uint8_t *buffer() const override { return _buffer; } + + bool is_constant() const override { return true; } + bool is_dynamic() const override { return false; } + void set_dynamic() override + { + throw std::runtime_error("This tensor does not support changing dynamic"); + } + + void setShape(const ir::Shape &) override + { + throw std::runtime_error("This tensor does not support changing shape"); + } + + void increase_ref() override { ++_num_references; } + + void decrease_ref() override + { + assert(_data != nullptr); + assert(_num_references > 0); + --_num_references; + if (_num_references == 0) + { + _data.reset(); + _buffer = nullptr; + } + } + + /** + * @brief Reset reference count to zero and release data + */ + void reset_ref() override + { + assert(_data != nullptr); + assert(_num_references > 0); + _num_references = 0; + + _data.reset(); + _buffer = nullptr; + } + + int32_t num_references() override { return _num_references; } + +private: + std::shared_ptr<const ir::Data> _data; +}; } // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h index af13d13f7..7850e21eb 100644 --- a/runtime/onert/core/include/compiler/BackendManager.h +++ b/runtime/onert/core/include/compiler/BackendManager.h @@ -34,7 +34,7 @@ class BackendManager public: using backend_create_t = backend::Backend *(*)(); using backend_destroy_t = void (*)(backend::Backend *); - using dlhandle_destroy_t = void (*)(void *); + using dlhandle_destroy_t = std::function<void(void *)>; static BackendManager &get(); diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h index 3098be7ba..68b862d58 100644 --- a/runtime/onert/core/include/compiler/Compiler.h +++ b/runtime/onert/core/include/compiler/Compiler.h @@ -24,6 +24,7 @@ #include "ir/Graph.h" #include "exec/IExecutor.h" +#include "util/TracingCtx.h" namespace onert { @@ -48,7 +49,6 @@ struct CompilerOptions { // GENERAL OPTIONS std::vector<std::string> backend_list; - bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option // OPTIONS ONLY FOR DEBUGGING/PROFILING std::string trace_filepath; //< File path to save trace records @@ -60,6 +60,8 @@ struct CompilerOptions bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF bool disable_compile; //< Run with Interpreter if true, try compilation otherwise bool fp16_enable; //< Whether fp16 mode ON/OFF + + util::TracingCtx *tracing_ctx; //< Profiling information }; CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs); @@ -73,8 +75,9 @@ public: /** * @brief Construct a new Compiler object * @param[in] subgs All subgraphs of a model + * @param[in] tracing_ctx Profiling information */ - Compiler(const std::shared_ptr<ir::Subgraphs> &subgs); + Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx); public: /** diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h index aadba6857..f115ab9a8 100644 --- a/runtime/onert/core/include/compiler/LoweredGraph.h +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -67,8 +67,7 @@ private: const compiler::BackendResolver &backend_resolver); void manipulateLowerInfo( - ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, - bool is_primary); + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info); void dumpLowerInfo(); bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index, ir::Layout layout, const compiler::BackendResolver &backend_resolver); diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h index 05f2679fc..33a2f62d9 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInferer.h +++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h @@ -68,7 +68,7 @@ private: private: // TODO Define visitors for operations. List them in alphabetic order. - void visit(const ir::operation::ArgMax &op) override; + void visit(const ir::operation::ArgMinMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; void visit(const ir::operation::BCQFullyConnected &op) override; void visit(const ir::operation::BCQGather &op) override; diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h index d2eb83159..1f3a13b06 100644 --- a/runtime/onert/core/include/exec/DynamicShapeInferer.h +++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h @@ -49,7 +49,7 @@ public: public: // TODO Define visitors for operations. List them in alphabetic order. // Remove TODO when any op starting from the alphabet is added - void visit(const ir::operation::ArgMax &op) override; + void visit(const ir::operation::ArgMinMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; void visit(const ir::operation::BCQFullyConnected &op) override; void visit(const ir::operation::BCQGather &op) override; diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h index 1d2831dd0..345bec8eb 100644 --- a/runtime/onert/core/include/exec/IExecutor.h +++ b/runtime/onert/core/include/exec/IExecutor.h @@ -18,17 +18,32 @@ * @file IExecutor.h * @brief This file defines interface of Executor */ -#ifndef __ONERT_EXEC_I_EXECUTOR_H_ -#define __ONERT_EXEC_I_EXECUTOR_H_ +#ifndef __ONERT_EXEC_I_EXECUTOR_H__ +#define __ONERT_EXEC_I_EXECUTOR_H__ #include "ir/Graph.h" #include "IFunction.h" #include "IODescription.h" +#include "ir/Index.h" #include "ir/OperationIndexMap.h" -#include "backend/IDynamicTensorManager.h" + +#include <cstdint> +#include <memory> +#include <unordered_map> namespace onert { +namespace backend +{ +class IPortableTensor; +namespace controlflow +{ +class IOTensor; +} +} +} +namespace onert +{ namespace exec { class IExecutionObserver; @@ -60,11 +75,29 @@ struct IExecutor virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0; /** - * @brief Start execution + * @brief Execute with user-given input/output description (for primary subgraph) * @param[in] desc Input and output description * @note This method should be thread-safe */ virtual void execute(const IODescription &desc) = 0; + + /** + * @brief Execute with given input/output tensors + * + * For non-primary subgraphs, input and output tensors must be given. + * + * @param[in] inputs tensors that are passed as inputs + * @param[in] outputs tensors that are passed as outputs + */ + virtual void execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) = 0; + + /** + * @brief Get output tensor objects + * + * @return Vector of @c IOTensor + */ + virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0; }; using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>; @@ -72,4 +105,4 @@ using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecu } // namespace exec } // namespace onert -#endif // __ONERT_EXEC_I_EXECUTOR_H_ +#endif // __ONERT_EXEC_I_EXECUTOR_H__ diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h index 9f09de3fb..e77c308ea 100644 --- a/runtime/onert/core/include/ir/DataType.h +++ b/runtime/onert/core/include/ir/DataType.h @@ -37,6 +37,7 @@ enum class DataType INT64 = 8, QUANT_INT8_ASYMM = 9, QUANT_INT16_ASYMM = 10, + QUANT_INT8_SYMM_PER_CHANNEL = 11, }; size_t sizeOfDataType(DataType data_type); diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h index 1f20ee665..45fadc474 100644 --- a/runtime/onert/core/include/ir/Operations.Include.h +++ b/runtime/onert/core/include/ir/Operations.Include.h @@ -17,69 +17,69 @@ // This file has no ifdef guard intentionally #include "ir/operation/AddN.h" +#include "ir/operation/ArgMinMax.h" +#include "ir/operation/BatchMatMul.h" #include "ir/operation/BatchToSpaceND.h" +#include "ir/operation/BCQFullyConnected.h" +#include "ir/operation/BCQGather.h" #include "ir/operation/BinaryArithmetic.h" #include "ir/operation/BroadcastTo.h" -#include "ir/operation/Conv2D.h" -#include "ir/operation/Pool2D.h" +#include "ir/operation/Comparison.h" #include "ir/operation/Concat.h" -#include "ir/operation/Reshape.h" -#include "ir/operation/Fill.h" -#include "ir/operation/FullyConnected.h" -#include "ir/operation/Softmax.h" -#include "ir/operation/Transpose.h" -#include "ir/operation/Permute.h" -#include "ir/operation/Reduce.h" +#include "ir/operation/Conv2D.h" +#include "ir/operation/ConvertFp16ToFp32.h" +#include "ir/operation/ConvertFp32ToFp16.h" +#include "ir/operation/Custom.h" +#include "ir/operation/DepthToSpace.h" #include "ir/operation/DepthwiseConv2D.h" -#include "ir/operation/Slice.h" -#include "ir/operation/StridedSlice.h" -#include "ir/operation/Squeeze.h" +#include "ir/operation/Einsum.h" #include "ir/operation/ElementwiseActivation.h" #include "ir/operation/ElementwiseBinary.h" #include "ir/operation/ElementwiseUnary.h" +#include "ir/operation/EmbeddingLookup.h" #include "ir/operation/ExpandDims.h" -#include "ir/operation/Comparison.h" +#include "ir/operation/Fill.h" +#include "ir/operation/FullyConnected.h" +#include "ir/operation/FusedBatchNorm.h" +#include "ir/operation/Gather.h" +#include "ir/operation/HashtableLookup.h" +#include "ir/operation/If.h" +#include "ir/operation/InstanceNorm.h" +#include "ir/operation/L2Normalization.h" +#include "ir/operation/LocalResponseNormalization.h" +#include "ir/operation/LogSoftmax.h" #include "ir/operation/LSTM.h" +#include "ir/operation/MatrixBandPart.h" +#include "ir/operation/OneHot.h" +#include "ir/operation/Pack.h" +#include "ir/operation/Pad.h" +#include "ir/operation/Permute.h" +#include "ir/operation/Pool2D.h" +#include "ir/operation/Pow.h" +#include "ir/operation/PReLU.h" +#include "ir/operation/Range.h" +#include "ir/operation/Rank.h" +#include "ir/operation/Reduce.h" +#include "ir/operation/Reshape.h" #include "ir/operation/ResizeBilinear.h" #include "ir/operation/ResizeNearestNeighbor.h" #include "ir/operation/Reverse.h" #include "ir/operation/RNN.h" +#include "ir/operation/Select.h" +#include "ir/operation/Shape.h" +#include "ir/operation/Slice.h" +#include "ir/operation/Softmax.h" #include "ir/operation/SpaceToBatchND.h" #include "ir/operation/SpaceToDepth.h" -#include "ir/operation/EmbeddingLookup.h" -#include "ir/operation/L2Normalization.h" -#include "ir/operation/HashtableLookup.h" -#include "ir/operation/InstanceNorm.h" -#include "ir/operation/PReLU.h" -#include "ir/operation/TransposeConv.h" -#include "ir/operation/SquaredDifference.h" -#include "ir/operation/TopKV2.h" -#include "ir/operation/Gather.h" -#include "ir/operation/ArgMax.h" -#include "ir/operation/LocalResponseNormalization.h" -#include "ir/operation/DepthToSpace.h" -#include "ir/operation/Pack.h" -#include "ir/operation/Select.h" #include "ir/operation/Split.h" #include "ir/operation/SplitV.h" +#include "ir/operation/SquaredDifference.h" +#include "ir/operation/Squeeze.h" +#include "ir/operation/StatelessRandomUniform.h" +#include "ir/operation/StridedSlice.h" +#include "ir/operation/Tile.h" +#include "ir/operation/TopKV2.h" +#include "ir/operation/Transpose.h" +#include "ir/operation/TransposeConv.h" #include "ir/operation/Unpack.h" -#include "ir/operation/Pad.h" -#include "ir/operation/Custom.h" -#include "ir/operation/Einsum.h" -#include "ir/operation/OneHot.h" -#include "ir/operation/Shape.h" -#include "ir/operation/ConvertFp32ToFp16.h" -#include "ir/operation/ConvertFp16ToFp32.h" -#include "ir/operation/If.h" #include "ir/operation/While.h" -#include "ir/operation/Pow.h" -#include "ir/operation/Tile.h" -#include "ir/operation/Range.h" -#include "ir/operation/Rank.h" -#include "ir/operation/BCQFullyConnected.h" -#include "ir/operation/BCQGather.h" -#include "ir/operation/MatrixBandPart.h" -#include "ir/operation/BatchMatMul.h" -#include "ir/operation/FusedBatchNorm.h" -#include "ir/operation/LogSoftmax.h" -#include "ir/operation/StatelessRandomUniform.h" diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst index ccde4d179..7f3c40b4b 100644 --- a/runtime/onert/core/include/ir/Operations.lst +++ b/runtime/onert/core/include/ir/Operations.lst @@ -20,69 +20,69 @@ // Internal Name OP(AddN) +OP(ArgMinMax) +OP(BatchMatMul) OP(BatchToSpaceND) +OP(BCQFullyConnected) +OP(BCQGather) OP(BinaryArithmetic) OP(BroadcastTo) +OP(Comparison) +OP(Concat) OP(Conv2D) +OP(ConvertFp16ToFp32) +OP(ConvertFp32ToFp16) +OP(Custom) +OP(DepthToSpace) OP(DepthwiseConv2D) -OP(Pool2D) -OP(Concat) -OP(Fill) -OP(FullyConnected) -OP(Reduce) -OP(Reshape) -OP(Softmax) -OP(Squeeze) -OP(Slice) -OP(StridedSlice) -OP(Transpose) +OP(Einsum) OP(ElementwiseActivation) OP(ElementwiseBinary) OP(ElementwiseUnary) +OP(EmbeddingLookup) OP(ExpandDims) -OP(Comparison) +OP(Fill) +OP(FullyConnected) +OP(FusedBatchNorm) +OP(Gather) +OP(HashtableLookup) +OP(If) +OP(InstanceNorm) +OP(L2Normalization) +OP(LocalResponseNormalization) +OP(LogSoftmax) OP(LSTM) +OP(MatrixBandPart) +OP(OneHot) +OP(Pack) +OP(Pad) +OP(Permute) +OP(Pool2D) +OP(Pow) +OP(PReLU) +OP(Range) +OP(Rank) +OP(Reduce) +OP(Reshape) OP(ResizeBilinear) OP(ResizeNearestNeighbor) OP(Reverse) OP(RNN) +OP(Select) +OP(Shape) +OP(Slice) +OP(Softmax) OP(SpaceToBatchND) OP(SpaceToDepth) -OP(EmbeddingLookup) -OP(L2Normalization) -OP(HashtableLookup) -OP(InstanceNorm) -OP(PReLU) -OP(TransposeConv) -OP(SquaredDifference) -OP(TopKV2) -OP(Gather) -OP(ArgMax) -OP(Einsum) -OP(LocalResponseNormalization) -OP(DepthToSpace) -OP(Pack) -OP(Select) OP(Split) OP(SplitV) +OP(SquaredDifference) +OP(Squeeze) +OP(StatelessRandomUniform) +OP(StridedSlice) +OP(Tile) +OP(TopKV2) +OP(Transpose) +OP(TransposeConv) OP(Unpack) -OP(Pad) -OP(Custom) -OP(Permute) -OP(OneHot) -OP(Shape) -OP(ConvertFp32ToFp16) -OP(ConvertFp16ToFp32) -OP(If) OP(While) -OP(Pow) -OP(Tile) -OP(Range) -OP(Rank) -OP(BCQFullyConnected) -OP(BCQGather) -OP(MatrixBandPart) -OP(BatchMatMul) -OP(FusedBatchNorm) -OP(LogSoftmax) -OP(StatelessRandomUniform) diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h index 7b4c33b76..6cb369447 100644 --- a/runtime/onert/core/include/ir/Subgraphs.h +++ b/runtime/onert/core/include/ir/Subgraphs.h @@ -120,7 +120,7 @@ public: * * @return count of Subgraphs */ - size_t count() { return _subgraphs.size(); } + size_t count() const { return _subgraphs.size(); } /** * @brief Return the primary subgraph diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMinMax.h index ea7eabb83..1c9fccd22 100644 --- a/runtime/onert/core/include/ir/operation/ArgMax.h +++ b/runtime/onert/core/include/ir/operation/ArgMinMax.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__ -#define __ONERT_IR_OPERATION_ARG_MAX_H__ +#ifndef __ONERT_IR_OPERATION_ARG_MIN_MAX_H__ +#define __ONERT_IR_OPERATION_ARG_MIN_MAX_H__ #include "ir/Operation.h" @@ -26,7 +26,7 @@ namespace ir namespace operation { -class ArgMax : public Operation +class ArgMinMax : public Operation { public: enum Input @@ -38,15 +38,16 @@ public: struct Param { DataType output_type; + bool is_arg_max = true; }; public: - ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m); + ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::ArgMax; } + OpCode opcode() const final { return OpCode::ArgMinMax; } public: const Param ¶m() const { return _param; } @@ -59,4 +60,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_ARG_MAX_H__ +#endif // __ONERT_IR_OPERATION_ARG_MIN_MAX_H__ diff --git a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h index c40778a56..7d6cb544a 100644 --- a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h +++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ -#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ +#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__ +#define __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__ #include "ir/Operation.h" @@ -51,7 +51,7 @@ public: RSQRT, SIN, SQRT, - SQURE, + SQUARE, ZEROS_LIKE }; @@ -80,4 +80,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ +#endif // __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__ diff --git a/runtime/onert/core/include/ir/operation/Fill.h b/runtime/onert/core/include/ir/operation/Fill.h index 524e41385..b55c77ae5 100644 --- a/runtime/onert/core/include/ir/operation/Fill.h +++ b/runtime/onert/core/include/ir/operation/Fill.h @@ -31,7 +31,7 @@ class Fill : public Operation public: enum Input { - INPUT = 0, + SHAPE = 0, VALUE, }; diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst index 30f211011..5944f8344 100644 --- a/runtime/onert/core/include/util/Config.lst +++ b/runtime/onert/core/include/util/Config.lst @@ -20,7 +20,7 @@ // Name | Type | Default CONFIG(GRAPH_DOT_DUMP , int , "0") -CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq +CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;bcq") // FIXME Remove bcq CONFIG(OP_BACKEND_ALLOPS , std::string , "") CONFIG(OP_BACKEND_MAP , std::string , "") CONFIG(DISABLE_COMPILE , bool , "0") @@ -35,6 +35,7 @@ CONFIG(OP_SEQ_MAX_NODE , int , "0") CONFIG(TRACE_FILEPATH , std::string , "") CONFIG(FP16_ENABLE , bool , "0") CONFIG(RUY_THREADS , int , "-1") +CONFIG(XNNPACK_THREADS , int , "-1") CONFIG(USE_MMAPED_DATA , bool , "0") // Auto-generate all operations diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h index b6a8144fd..da8bc8620 100644 --- a/runtime/onert/core/include/util/ConfigSource.h +++ b/runtime/onert/core/include/util/ConfigSource.h @@ -27,6 +27,7 @@ namespace util { void config_source(std::unique_ptr<IConfigSource> &&source); +void config_source_ext(std::unique_ptr<IConfigSource> &&source); bool toBool(const std::string &val); int toInt(const std::string &val); diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h index 701b835d2..b11da90ce 100644 --- a/runtime/onert/core/include/util/ShapeInference.h +++ b/runtime/onert/core/include/util/ShapeInference.h @@ -42,7 +42,7 @@ using Shapes = std::vector<ir::Shape>; // Define shape calculation for operations. List them in alphabetic order. -ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank); +ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank); ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape, const ir::operation::BatchMatMul::Param ¶m); @@ -70,7 +70,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis); -ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf); +template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf); ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape); diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h new file mode 100644 index 000000000..a82704cf0 --- /dev/null +++ b/runtime/onert/core/include/util/TracingCtx.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_UTIL_TRACING_CTX_H__ +#define __ONERT_UTIL_TRACING_CTX_H__ + +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/Subgraphs.h" + +#include <unordered_map> +#include <mutex> + +namespace onert +{ +namespace util +{ + +/** + * @brief Class to maintain information about profiling per session + */ +class TracingCtx +{ +public: + /** + * @brief Create and store unique session id managed by this class + * Note that this constructor can be called by multiple sessions running in parallely. + * Use this constructor only when there is only one subgraph in a model. + */ + TracingCtx(const ir::Graph *primary_subgraph) + { + decideSessionID(); + _subgraph_indices.emplace(primary_subgraph, 0); + } + + /** + * @brief Create and store unique session id managed by this class + * Note that this constructor can be called by multiple sessions running in parallely. + */ + TracingCtx(const onert::ir::Subgraphs *subgraphs) + { + assert(subgraphs); + + decideSessionID(); + + auto count = subgraphs->count(); + for (size_t i = 0; i < count; i++) + _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i); + } + + uint32_t getSessionId() const { return _session_id; } + + /** + * @brief Set subgraph index of a graph + */ + void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); } + + /** + * @brief Get subgraph index of a graph. + */ + ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); } + +private: + void decideSessionID() + { + std::unique_lock<std::mutex> lock{_session_id_mutex}; + + static uint32_t next_session_id = 0; + _session_id = next_session_id++; + } + +private: + std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices; + uint32_t _session_id; + static std::mutex _session_id_mutex; +}; + +} // namespace util +} // namespace onert + +#endif // __ONERT_UTIL_TRACING_CTX_H__ diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h index 76cfb8d60..65c375077 100644 --- a/runtime/onert/core/include/util/logging.h +++ b/runtime/onert/core/include/util/logging.h @@ -64,4 +64,11 @@ static Context &ctx = Context::get(); if (::onert::util::logging::ctx.enabled()) \ std::cout << "[" << __func__ << "] " +#define WHEN_LOG_ENABLED(METHOD) \ + if (::onert::util::logging::ctx.enabled()) \ + do \ + { \ + METHOD; \ + } while (0) + #endif // __ONERT_UTIL_LOGGING_H__ diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc index bafa36d28..404c3b155 100644 --- a/runtime/onert/core/src/backend/BackendContext.cc +++ b/runtime/onert/core/src/backend/BackendContext.cc @@ -17,7 +17,6 @@ #include "backend/BackendContext.h" #include "ir/Operation.h" -#include "backend/IConstantInitializer.h" namespace onert { @@ -31,25 +30,5 @@ void BackendContext::initialize(const std::vector<OperationInfo> &operation_list _operand_list = operand_list; } -void BackendContext::initConsts() -{ - for (auto &op : _operation_list) - { - constant_initializer->setLayout(op.layout); - _graph->operations().at(op.index).accept(*constant_initializer); - } - - for (auto ind : _operand_list) - { - const auto &obj = _graph->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - } - - constant_initializer->run(); -} - } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h index cc8346e6b..3323cf5cb 100644 --- a/runtime/onert/core/src/backend/controlflow/Backend.h +++ b/runtime/onert/core/src/backend/controlflow/Backend.h @@ -72,8 +72,6 @@ public: context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr, context->external_context()); - context->tensor_register = nullptr; - context->optimizer = nullptr; return context; } diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.cc b/runtime/onert/core/src/backend/controlflow/BackendContext.cc new file mode 100644 index 000000000..366377edf --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/BackendContext.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "KernelGenerator.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace controlflow +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h index 3647338a0..a768d5d61 100644 --- a/runtime/onert/core/src/backend/controlflow/BackendContext.h +++ b/runtime/onert/core/src/backend/controlflow/BackendContext.h @@ -18,6 +18,9 @@ #define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__ #include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" #include "ExternalContext.h" namespace onert @@ -32,21 +35,36 @@ class BackendContext : public onert::backend::BackendContext public: BackendContext(const Backend *backend, const ir::Graph *graph, std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, - std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, - std::shared_ptr<ITensorRegister> tensor_register = nullptr, - std::shared_ptr<IOptimizer> optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, - constant_initializer, kernel_gen, tensor_register, - optimizer), - _external_context(std::make_shared<ExternalContext>()) + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>()) { } + ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs) override; + std::shared_ptr<ExternalContext> external_context() { return _external_context; } private: + void initConsts(); + void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<ConstantInitializer> constant_initializer; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, // the thread pool is also created in duplicate // TODO Create one ruy context for session diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h index e21a8f357..ac97ef91c 100644 --- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h +++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h @@ -17,10 +17,7 @@ #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ -#include "TensorRegistry.h" - -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> +#include <backend/cpu_common/ConstantInitializer.h> namespace onert { @@ -29,21 +26,7 @@ namespace backend namespace controlflow { -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} - { - } - -private: - std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr<ITensorRegistry> _tensor_reg; -}; +using ConstantInitializer = cpu_common::ConstantInitializer; } // namespace controlflow } // namespace backend diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h index 3db6829a9..cfb983136 100644 --- a/runtime/onert/core/src/backend/controlflow/ExternalContext.h +++ b/runtime/onert/core/src/backend/controlflow/ExternalContext.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__ #define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__ -#include <backend/IExternalContext.h> #include <util/ConfigSource.h> #include <ruy/context.h> @@ -38,7 +37,7 @@ namespace controlflow { // TODO Unify this with cpu::ExternalContext -class ExternalContext : public IExternalContext +class ExternalContext { public: ExternalContext() : _ruy_context(std::make_unique<ruy::Context>()) diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.cc b/runtime/onert/core/src/backend/controlflow/IOTensor.cc new file mode 100644 index 000000000..47405ac9e --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/IOTensor.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IOTensor.h" + +#include <assert.h> + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout) + : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout} +{ + setUserTensor(nullptr, 0); +} + +void IOTensor::setTensor(IPortableTensor *tensor) +{ + assert(tensor); + assert(tensor != this); + // TODO Handle when layout was changed + assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet + _user_tensor.reset(); + _tensor = tensor; +} + +void IOTensor::setUserTensor(uint8_t *buffer, size_t size) +{ + _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size); + _tensor = _user_tensor.get(); +} + +} // namespace controlflow +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.h b/runtime/onert/core/src/backend/controlflow/IOTensor.h new file mode 100644 index 000000000..a7ed84b6d --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/IOTensor.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ +#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ + +#include "backend/IPortableTensor.h" +#include "UserTensor.h" + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +/** + * @brief Tensor object that indirects to the tensor it is pointing to. + * + * A model I/O tensor could be two types. + * + * 1. @c UserTensor, if it is the primary graph + * 2. Any other derivative of @c IPortableTensor from another backend, otherwise + * + * To support these, this object indirects everything to the actual tensor pointer. + * Exceptionally if it is UserTensor, this class creates and manages it. + */ +class IOTensor : public IPortableTensor +{ +public: + IOTensor(const ir::OperandInfo &info, ir::Layout layout); + +public: + void setTensor(IPortableTensor *tensor); + void setUserTensor(uint8_t *buffer, size_t size); + ir::OperandInfo orig_info() const { return _orig_info; } + ir::Layout orig_layout() const { return _orig_layout; } + +public: + uint8_t *buffer() const override { return _tensor->buffer(); } + size_t total_size() const override { return _tensor->total_size(); } + size_t dimension(size_t index) const override { return _tensor->dimension(index); } + size_t num_dimensions() const override { return _tensor->num_dimensions(); } + size_t calcOffset(const ir::Coordinates &coords) const override + { + return _tensor->calcOffset(coords); + } + ir::Layout layout() const override { return _tensor->layout(); } + ir::DataType data_type() const override { return _tensor->data_type(); } + float data_scale() const override { return _tensor->data_scale(); } + int32_t data_offset() const override { return _tensor->data_offset(); } + bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); } + void set_dynamic() override { _is_dynamic = true; } + ir::Shape getShape() const override { return _tensor->getShape(); } + void setShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + _tensor->setShape(shape); + } + bool is_constant() const override { return _tensor->is_constant(); } + bool applyShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + return _tensor->applyShape(shape); + } + +private: + const ir::OperandInfo _orig_info; + const ir::Layout _orig_layout; + bool _is_dynamic{false}; + IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected + std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object +}; + +} // namespace controlflow +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc index 8e39ee527..2606f044e 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc @@ -31,7 +31,7 @@ namespace backend namespace controlflow { -KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, +KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, const std::shared_ptr<TensorRegistry> &tensor_reg, const std::shared_ptr<ExternalContext> &external_context) : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg}, @@ -77,18 +77,17 @@ void KernelGenerator::visit(const ir::operation::If &node) const auto then_subg_index = node.param().then_subg_index; const auto else_subg_index = node.param().else_subg_index; - std::vector<backend::ITensor *> input_tensors; + std::vector<backend::IPortableTensor *> input_tensors; for (const auto input_index : node.getInputs()) { - auto input_tensor = getTensor(input_index); - + auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } - std::vector<backend::ITensor *> output_tensors; + std::vector<backend::IPortableTensor *> output_tensors; for (const auto output_index : node.getOutputs()) { - auto output_tensor = getTensor(output_index); + auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); } @@ -97,8 +96,8 @@ void KernelGenerator::visit(const ir::operation::If &node) const auto cond_tensor = input_tensors.front(); input_tensors.erase(input_tensors.begin()); auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>( - cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index, - else_subg_index, _executor_map, _external_context); + cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map, + _external_context); _return_fn = std::move(fn); } @@ -124,33 +123,40 @@ void KernelGenerator::visit(const ir::operation::While &node) // This op does not support input as a constant, because controlflow backend does not have // TensorBuilder - std::vector<backend::ITensor *> input_tensors; + std::vector<backend::IPortableTensor *> input_tensors; for (const auto input_index : node.getInputs()) { - auto input_tensor = getTensor(input_index); - + auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } - std::vector<backend::ITensor *> output_tensors; + std::vector<backend::IPortableTensor *> output_tensors; for (const auto output_index : node.getOutputs()) { - auto output_tensor = getTensor(output_index); + auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); } // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of // creating executor recusively auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>( - input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index, - _executor_map, _external_context); + input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map, + _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context); _return_fn = std::move(fn); } backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index) { - backend::ITensor *ret = _tensor_registries.getITensor(index); + // get Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getITensor(index); + assert(ret != nullptr); + return ret; +} + +backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index) +{ + auto ret = _tensor_reg->getPortableTensor(index); assert(ret != nullptr); return ret; } diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h index c2c124339..7b395d186 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h @@ -17,13 +17,12 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> -#include <backend/ITensorBuilder.h> #include <exec/IExecutor.h> #include "ExternalContext.h" #include <ir/Graph.h> #include "TensorBuilder.h" #include "compiler/TensorRegistries.h" +#include "backend/cpu_common/KernelGeneratorBase.h" #include "TensorRegistry.h" namespace onert @@ -33,10 +32,10 @@ namespace backend namespace controlflow { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: - KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, + KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, const std::shared_ptr<TensorRegistry> &tensor_reg, const std::shared_ptr<ExternalContext> &external_context); @@ -50,8 +49,6 @@ public: _executor_map = executor_map.get(); } - using IKernelGenerator::visit; - void visit(const ir::OpSequence &) override; void visit(const ir::operation::If &) override; void visit(const ir::operation::Permute &) override; @@ -59,10 +56,11 @@ public: private: backend::ITensor *getTensor(const ir::OperandIndex &index); + backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index); private: const ir::Graph &_graph; - IDynamicTensorManager *_dyn_tensor_manager; + DynamicTensorManager *_dyn_tensor_manager; std::shared_ptr<TensorRegistry> _tensor_reg; compiler::TensorRegistries _tensor_registries; exec::ExecutorMap *_executor_map; diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h index ba5bafd75..87951a9b3 100644 --- a/runtime/onert/core/src/backend/controlflow/Tensor.h +++ b/runtime/onert/core/src/backend/controlflow/Tensor.h @@ -27,6 +27,7 @@ namespace controlflow { using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; } // namespace controlflow } // namespace backend diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc index e4b0388f9..a767f0eca 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc @@ -30,8 +30,8 @@ namespace controlflow TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())}, - _static_tensor_mgr{new cpu_common::StaticTensorManager( - _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())} + _static_tensor_mgr{ + new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} { /* empty */ } @@ -90,11 +90,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const return _tensor_info_map.find(ind) != _tensor_info_map.end(); } -void TensorBuilder::prepare(void) -{ - _static_tensor_mgr->allocateConsts(); - _static_tensor_mgr->allocateNonconsts(); -} +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } void TensorBuilder::allocate() { @@ -102,7 +98,7 @@ void TensorBuilder::allocate() // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. } -IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void) +DynamicTensorManager *TensorBuilder::dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h index 695994761..d2e3076fd 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h @@ -21,7 +21,6 @@ #include <backend/cpu_common/TensorRegistry.h> #include <backend/cpu_common/Tensor.h> -#include <backend/ITensorBuilder.h> #include <ir/OperandIndexMap.h> #include <unordered_map> @@ -35,7 +34,7 @@ namespace backend namespace controlflow { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg); @@ -47,18 +46,18 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} - IDynamicTensorManager *dynamicTensorManager(void) override; + DynamicTensorManager *dynamicTensorManager(void); /** * @brief Get tensor with a specific OperandIndex. diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h index 94f71bb9c..901f0aebb 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h +++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h @@ -20,7 +20,7 @@ #include "backend/cpu_common/TensorRegistry.h" #include "backend/ITensorRegistry.h" #include "Tensor.h" -#include "UserTensor.h" +#include "IOTensor.h" #include <assert.h> namespace onert @@ -36,9 +36,10 @@ namespace controlflow * This class contains three types of tensors. Two native tensors(tensors that are managed by this * backend) and the other is migrant tensor. * - * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given - * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) - * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg ) + * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg ) + * - NOTE The tensor it actually points to can be from another backend + * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) + * - MigrantTensor - @c IPortableTensor managed by other backends * * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager * @@ -53,7 +54,7 @@ public: auto base_tensor = _base_reg->getITensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } ITensor *getNativeITensor(const ir::OperandIndex &ind) override @@ -61,7 +62,7 @@ public: auto base_tensor = _base_reg->getNativeITensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } IPortableTensor *getPortableTensor(const ir::OperandIndex &ind) @@ -69,7 +70,7 @@ public: auto base_tensor = _base_reg->getPortableTensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } IPortableTensor *getNativeTensor(const ir::OperandIndex &ind) @@ -77,7 +78,7 @@ public: auto base_tensor = _base_reg->getNativeTensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } Tensor *getNativeOwnTensor(const ir::OperandIndex &ind) @@ -85,10 +86,10 @@ public: return _base_reg->getNativeTensor(ind); } - UserTensor *getNativeUserTensor(const ir::OperandIndex &ind) + IOTensor *getNativeIOTensor(const ir::OperandIndex &ind) { - auto tensor = _native_user_tensors.find(ind); - if (tensor != _native_user_tensors.end()) + auto tensor = _native_io_tensors.find(ind); + if (tensor != _native_io_tensors.end()) return tensor->second.get(); return nullptr; } @@ -108,22 +109,22 @@ public: _base_reg->setNativeTensor(ind, std::move(tensor)); } - void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr<UserTensor> &&tensor) + void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor) { assert(tensor); assert(!getITensor(ind)); // For the ind, tensor is not registered yet - _native_user_tensors[ind] = std::move(tensor); + _native_io_tensors[ind] = std::move(tensor); } - const ir::OperandIndexMap<std::unique_ptr<UserTensor>> &native_user_tensors() + const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors() { - return _native_user_tensors; + return _native_io_tensors; } std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; } private: std::shared_ptr<cpu_common::TensorRegistry> _base_reg; - ir::OperandIndexMap<std::unique_ptr<UserTensor>> _native_user_tensors; + ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors; }; } // namespace controlflow diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc index de91b850a..1d786c4dd 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc +++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc @@ -18,7 +18,6 @@ #include <backend/ITensor.h> #include "exec/ExecutorBase.h" -#include <misc/polymorphic_downcast.h> #include "PermuteLayer.h" namespace onert @@ -30,16 +29,15 @@ namespace controlflow namespace kernel { -IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors, - const std::vector<backend::ITensor *> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, +IfLayer::IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, exec::ExecutorMap *executor_map, const std::shared_ptr<ExternalContext> &external_context) : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index}, - _else_subg_index{else_subg_index}, _executor_map{executor_map}, - _external_context{external_context} + _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, + _executor_map{executor_map}, _external_context{external_context} { // At this point, executor_map may not have executors of then subg and else subg } @@ -48,79 +46,34 @@ void IfLayer::run() { // Check condition // // If true - // // // Copy _input_tensors -> then subg's inputs - // // // Run then subg - // // // Copy outputs of then subg -> _output_tensors + // // // Set _input_tensors -> then-subg's inputs + // // // Set outputs of then-subg -> _output_tensors + // // // Run then-subg // // Else - // // // Copy _input_tensors -> else subg's inputs if false - // // // Run else subg - // // // Copy outputs of else subg -> _output_tensors - auto getResultCond = [](backend::ITensor *tensor) -> bool { + // // // Set _input_tensors -> else-subg's inputs + // // // Set outputs of else-subg -> _output_tensors + // // // Run else-subg + + auto getResultCond = [](backend::IPortableTensor *tensor) -> bool { bool ret = false; tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); return ret; }; - exec::ExecutorBase *subg_exec = nullptr; + exec::IExecutor *subg_exec = nullptr; bool cond_result = getResultCond(_cond_tensor); if (cond_result) { VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl; - subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_then_subg_index).get()); + subg_exec = _executor_map->at(_then_subg_index).get(); } else { VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl; - subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_else_subg_index).get()); - } - - const auto &subg_graph = subg_exec->graph(); - - std::vector<backend::ITensor *> src_tensors; - std::vector<backend::ITensor *> dst_tensors; - // Add tensors used in subgraph or contained in outputs of subgraph - assert(subg_graph.getInputs().size() == _input_tensors.size()); - assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size()); - for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i) - { - const auto &subg_input_index = subg_graph.getInputs().at(i); - const auto &subg_input = subg_graph.operands().at(subg_input_index); - if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index)) - { - src_tensors.emplace_back(_input_tensors.at(i)); - dst_tensors.emplace_back(subg_exec->getInputTensors().at(i)); - } + subg_exec = _executor_map->at(_else_subg_index).get(); } - const auto permute_op_input_to_subg_input = - std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context); - - // Add tensors used as output of operation or contained in outputs of operation - src_tensors.clear(); - dst_tensors.clear(); - assert(_output_indices.size() == subg_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - for (uint32_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - src_tensors.emplace_back(subg_exec->getOutputTensors().at(i)); - dst_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_subg_output_to_op_output = - std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context); - - // Remove copying of unused tensor - permute_op_input_to_subg_input->prepare(); - permute_subg_output_to_op_output->prepare(); - // Copy & run - subg_exec->execute(_input_tensors, permute_op_input_to_subg_input); - permute_subg_output_to_op_output->run(); + subg_exec->execute(_input_tensors, _output_tensors); VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index) << std::endl; } diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h index 9e944bccc..967552fc3 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ -#include <backend/ITensor.h> +#include <backend/IPortableTensor.h> #include <exec/IExecutor.h> #include "../ExternalContext.h" @@ -33,9 +33,9 @@ namespace kernel class IfLayer : public ::onert::exec::IFunction { public: - IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors, - const std::vector<backend::ITensor *> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, + IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, exec::ExecutorMap *executor_map, const std::shared_ptr<ExternalContext> &external_context); @@ -44,11 +44,9 @@ public: void run() override; private: - backend::ITensor *_cond_tensor; - const std::vector<backend::ITensor *> _input_tensors; - const std::vector<backend::ITensor *> _output_tensors; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; + backend::IPortableTensor *_cond_tensor; + const std::vector<backend::IPortableTensor *> _input_tensors; + const std::vector<backend::IPortableTensor *> _output_tensors; const ir::SubgraphIndex _then_subg_index; const ir::SubgraphIndex _else_subg_index; exec::ExecutorMap *_executor_map; diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h index 5d0f1918e..6fb69b65c 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ -#include "backend/ITensorBuilder.h" #include "exec/IPermuteFunction.h" #include "exec/IExecutor.h" #include "../ExternalContext.h" diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc index a0d478603..a4b5aa5ca 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc +++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc @@ -16,6 +16,7 @@ #include "WhileLayer.h" +#include <algorithm> #include <backend/ITensor.h> #include "exec/ExecutorBase.h" #include <misc/polymorphic_downcast.h> @@ -30,16 +31,15 @@ namespace controlflow namespace kernel { -WhileLayer::WhileLayer(const std::vector<backend::ITensor *> input_tensors, - const std::vector<backend::ITensor *> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, +WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map, + cpu_common::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr<ExternalContext> &external_context) : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, - _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors}, - _output_tensors{output_tensors}, _executor_map{executor_map}, - _external_context{external_context} + _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map}, + _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context} { // At this point, executor_map may not have executors of cond subg and body subg } @@ -56,164 +56,90 @@ void WhileLayer::run() // // Run cond subg // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> // "_dst_tensors" - auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_cond_subg_index).get()); - auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_body_subg_index).get()); - - const auto &cond_graph = cond_exec->graph(); - const auto &body_graph = body_exec->graph(); - - std::vector<backend::ITensor *> input_tensors; - std::vector<backend::ITensor *> cond_input_tensors; - std::vector<backend::ITensor *> body_input_tensors; - std::vector<backend::ITensor *> body_output_tensors; - std::vector<backend::ITensor *> output_tensors; - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == _input_tensors.size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - input_tensors.emplace_back(_input_tensors.at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_op_input_to_cond_input = - std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, _external_context); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == _input_tensors.size()); - assert(_output_indices.size() == _output_tensors.size()); - input_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - input_tensors.emplace_back(_input_tensors.at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_op_input_to_op_output = - std::make_shared<PermuteLayer>(input_tensors, output_tensors, _external_context); - - // Add all tensors with unused tensors in body subgraph because unused input tensors will be - // copied output tensors in body subgraph - assert(_input_tensors.size() == body_exec->getInputTensors().size()); - input_tensors = _input_tensors; - body_input_tensors = body_exec->getInputTensors(); - const auto permute_op_input_to_body_input = - std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, _external_context); - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - body_output_tensors.clear(); - cond_input_tensors.clear(); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_cond_input = - std::make_shared<PermuteLayer>(body_output_tensors, cond_input_tensors, _external_context); - - // Add only used tensors in body subgraph - assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(body_graph.getInputs().size() == body_exec->getInputTensors().size()); - body_output_tensors.clear(); - body_input_tensors.clear(); - for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i) - { - const auto &body_input_index = body_graph.getInputs().at(i); - const auto &body_input = body_graph.operands().at(body_input_index); - if (body_input.getUses().size() > 0 && - !body_exec->graph().getOutputs().contains(body_input_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - body_input_tensors.emplace_back(body_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_body_input = - std::make_shared<PermuteLayer>(body_output_tensors, body_input_tensors, _external_context); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == body_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - body_output_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_body_output_to_op_output = - std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _external_context); + auto cond_exec = _executor_map->at(_cond_subg_index).get(); + auto body_exec = _executor_map->at(_body_subg_index).get(); - // Remove copying of unused tensor - permute_op_input_to_cond_input->prepare(); - permute_op_input_to_op_output->prepare(); - permute_op_input_to_body_input->prepare(); - permute_body_output_to_cond_input->prepare(); - permute_body_output_to_body_input->prepare(); - permute_body_output_to_op_output->prepare(); + // Need a temp tensor to hold the cond subgraph output + assert(cond_exec->getOutputTensors().size() == 1); + auto cond_output_tensor = [&]() { + auto cond_output = cond_exec->getOutputTensors().at(0); + auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + return tensor; + }(); VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; - cond_exec->execute(_input_tensors, permute_op_input_to_cond_input); + cond_exec->execute(_input_tensors, {cond_output_tensor.get()}); VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; - assert(cond_exec->getOutputTensors().size() == 1); - auto &cond_output_tensor = cond_exec->getOutputTensors().at(0); auto getResultCond = [](backend::ITensor *tensor) -> bool { bool ret = false; tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); return ret; }; + std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end()); + std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end()); + // Copying body inputs to outputs when the loop body is never executed + if (!getResultCond(cond_output_tensor.get())) + { + PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context}; + copy_body_inputs_to_op_outputs.run(); + return; + } + + // Need some temp tensors to hold the body subgraph output + std::vector<std::unique_ptr<Tensor>> temp_outputs_o; + std::vector<IPortableTensor *> temp_outputs; + for (auto io_tensor : body_exec->getOutputTensors()) + { + auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + temp_outputs.push_back(tensor.get()); + temp_outputs_o.push_back(std::move(tensor)); + } + + std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end()); + PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context}; + const auto body_execute_with_op_inputs = [&]() { VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; - body_exec->execute(_input_tensors, permute_op_input_to_body_input); + body_exec->execute(_input_tensors, temp_outputs); VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; }; const auto body_execute_with_body_outputs = [&]() { VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; - body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input); + body_exec->execute(_output_tensors, temp_outputs); VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; }; std::function<void()> body_execute = body_execute_with_op_inputs; const auto cond_execute = [&]() { VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; - cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input); + cond_exec->execute(_output_tensors, {cond_output_tensor.get()}); VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; }; - auto permute_to_outputs_fn = permute_op_input_to_op_output; // Loop while Cond subgraph's output is true - while (getResultCond(cond_output_tensor)) + while (getResultCond(cond_output_tensor.get())) { body_execute(); + copy_body_outputs_to_op_outputs.run(); cond_execute(); body_execute = body_execute_with_body_outputs; - permute_to_outputs_fn = permute_body_output_to_op_output; } - permute_to_outputs_fn->run(); + + // Clean-up the temp tensors + _dyn_memory_manager->deallocate(cond_output_tensor.get()); + for (auto tensor : temp_outputs) + { + _dyn_memory_manager->deallocate(tensor); + } } } // namespace kernel diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h index 8f82bd973..d3924c843 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h @@ -17,13 +17,15 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ -#include <backend/ITensor.h> +#include <backend/IPortableTensor.h> #include <exec/IExecutor.h> #include <exec/IFunction.h> #include <ir/OperandIndexSequence.h> #include <ir/Graph.h> #include "../ExternalContext.h" +#include "backend/cpu_common/MemoryManager.h" + namespace onert { namespace backend @@ -36,11 +38,10 @@ namespace kernel class WhileLayer : public ::onert::exec::IFunction { public: - WhileLayer(const std::vector<backend::ITensor *> input_tensors, - const std::vector<backend::ITensor *> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, + WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, - exec::ExecutorMap *executor_map, + exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr<ExternalContext> &external_context); public: @@ -49,11 +50,10 @@ public: private: const ir::SubgraphIndex _cond_subg_index; const ir::SubgraphIndex _body_subg_index; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; - const std::vector<backend::ITensor *> _input_tensors; - const std::vector<backend::ITensor *> _output_tensors; + const std::vector<backend::IPortableTensor *> _input_tensors; + const std::vector<backend::IPortableTensor *> _output_tensors; exec::ExecutorMap *_executor_map; + cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc new file mode 100644 index 000000000..732b03ce8 --- /dev/null +++ b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/cpu_common/BackendContextHelpers.h" diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc index 6f6eb77bc..610ba5ffc 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.cc +++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc @@ -14,19 +14,19 @@ * limitations under the License. */ -#include "ConstantInitializer.h" -#include "Tensor.h" +#include "backend/cpu_common/ConstantInitializer.h" +#include "backend/cpu_common/Tensor.h" namespace onert { namespace backend { -namespace cpu +namespace cpu_common { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} + : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } @@ -53,42 +53,6 @@ void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &in }; } -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); - const auto &weight_obj = _operands.at(weight_index); - registerExternalInitializer(weight_index, weight_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); - if (!bias_index.undefined()) - { - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); - } -} - -} // namespace cpu +} // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc index 6fb9757e0..15c2dfeb1 100644 --- a/runtime/onert/core/src/backend/IConstantInitializer.cc +++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "backend/IConstantInitializer.h" +#include "backend/cpu_common/ConstantInitializerBase.h" #include <Half.h> @@ -24,9 +24,11 @@ namespace onert { namespace backend { +namespace cpu_common +{ -void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) +void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index, + const ir::Operand &obj) { // For only CONSTANTS // TODO Add to check if tensor has been allocated @@ -67,8 +69,8 @@ void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index } } -void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) +void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index, + const ir::Operand &obj) { // For only CONSTANTS // TODO Add to check if tensor has been allocated @@ -82,27 +84,27 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in switch (type) { case DataType::FLOAT32: - _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout); break; case DataType::INT32: - _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout); break; case DataType::UINT32: - _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout); break; case DataType::BOOL8: case DataType::QUANT_UINT8_ASYMM: - _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout); break; case DataType::QUANT_INT8_SYMM: case DataType::QUANT_INT8_ASYMM: - _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout); break; case DataType::FLOAT16: - _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout); break; case DataType::INT64: - _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout); break; default: throw std::runtime_error("Not supported, yet"); @@ -110,5 +112,6 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in } } +} // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc index cac43babe..8c5c46a08 100644 --- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc @@ -17,6 +17,7 @@ #include "backend/cpu_common/StaticTensorManager.h" #include "backend/cpu_common/DynamicTensorManager.h" +#include "backend/cpu_common/Tensor.h" #include <util/logging.h> namespace onert @@ -27,31 +28,13 @@ namespace cpu_common { StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, - DynamicMemoryManager *dynamic_mem_mgr) - : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}, - _dynamic_mem_mgr{dynamic_mem_mgr} + DynamicTensorManager *dynamic_tensor_manager) + : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, + _dynamic_tensor_manager{dynamic_tensor_manager} { // DO NOTHING } -void StaticTensorManager::allocateConsts(void) -{ - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second.get(); - if (_as_constants[ind]) - { - auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size()); - tensor->setBuffer(mem_alloc); - auto buffer = mem_alloc->base(); - VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) - << "size : " << tensor->total_size() << std::endl; - } - } -} - void StaticTensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); @@ -65,14 +48,12 @@ void StaticTensorManager::allocateNonconsts(void) auto *buffer = _nonconst_mgr->getBuffer(ind); tensor->setBuffer(buffer); - VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) << std::endl; + VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value() + << "): " << static_cast<void *>(buffer) << std::endl; } } } -void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } - void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, @@ -80,8 +61,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, bool as_const) { assert(!_tensors->getNativeTensor(ind)); - auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr); - _tensors->setNativeTensor(ind, std::move(tensor)); + if (as_const) + { + auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout); + _tensors->setNativeTensor(ind, std::move(tensor)); + } + else + { + auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, + _dynamic_tensor_manager->dynamic_mem_mgr().get()); + _tensors->setNativeTensor(ind, std::move(tensor)); + } _as_constants[ind] = as_const; } diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc index d3dcf9a6d..e412cb775 100644 --- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc +++ b/runtime/onert/core/src/backend/cpu_common/Tensor.cc @@ -95,3 +95,20 @@ bool Tensor::applyShape(const ir::Shape &new_shape) } // namespace cpu_common } // namespace backend } // namespace onert + +// ExternalTensor + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +ExternalTensor::~ExternalTensor() {} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc index 0093f50fd..ea45cbeb7 100644 --- a/runtime/onert/core/src/compiler/BackendManager.cc +++ b/runtime/onert/core/src/compiler/BackendManager.cc @@ -69,55 +69,73 @@ void BackendManager::loadBackend(const std::string &backend) return; } - // TODO Remove indentation + const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; + void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + + if (handle == nullptr) { - const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; - void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n"; + return; + } - if (handle == nullptr) + VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n"; + + { + // load object creator function + auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); + if (backend_create == nullptr) { - VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl; + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n", + dlerror()); + dlclose(handle); return; } - VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n"; - + // load object creator function + auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); + if (backend_destroy == nullptr) { - // load object creator function - auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); - if (backend_create == nullptr) - { - fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n", - dlerror()); - abort(); - } - - // load object creator function - auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); - if (backend_destroy == nullptr) - { - fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n", - dlerror()); - abort(); - } - - auto backend_object = - std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy); - bool initialized = backend_object->config()->initialize(); // Call initialize here? - if (!initialized) - { - VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend" - << std::endl; - dlclose(handle); - return; - } - _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n", + dlerror()); + dlclose(handle); + return; } - // Save backend handle (avoid warning by handle lost without dlclose()) - auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }}; - _handle_map.emplace(backend, std::move(u_handle)); + auto backend_object = + std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy); + bool initialized = backend_object->config()->initialize(); // Call initialize here? + if (!initialized) + { + VERBOSE(BackendManager) << backend.c_str() + << " backend initialization failed. Don't use this backend" + << std::endl; + dlclose(handle); + return; + } + _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); } + + // Save backend handle (avoid warning by handle lost without dlclose()) + + // NOTE This is a workaround for clang-format3.9 (seems like it does not understand + // "by-copy capture with an initializer" + // clang-format off + auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{ + handle, [id = backend, filename = backend_so](void *h) { + if (dlclose(h) == 0) + { + VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n"; + } + else + { + VERBOSE(BackendManager) + << "Failed to unload backend '" << id << "'- " << dlerror() << "\n"; + } + }}; +// clang-format on +_handle_map.emplace(backend, std::move(u_handle)); } backend::Backend *BackendManager::get(const std::string &key) diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index c2844bd7c..7eeb14ad3 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -41,6 +41,30 @@ #include "ir/OperationDumper.h" #include "misc/string_helpers.h" +namespace +{ + +using namespace onert; + +std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend) +{ + std::unordered_map<ir::OpCode, std::string>::iterator it; + std::string opbackends; + + for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it) + { + if (!opbackends.empty()) + opbackends = opbackends + ", "; + + auto opcode = it->first; + const std::string opname = ir::toString(opcode); + opbackends += opname + "=" + it->second; + } + return opbackends; +} + +} // namespace + namespace onert { @@ -51,7 +75,6 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) { CompilerOptions options; options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); - options.is_primary_subgraph = false; options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE); @@ -108,13 +131,15 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) return options; } -Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs) +Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx) : _subgraphs{subgs}, _state{State::CREATED} { // Set default values for CompilerOptions // All these default values should not be fetched from Env, when we stop supporting Android NN // API. _options = fetchCompilerOptionsFromGlobalConfig(*subgs); + + _options.tracing_ctx = tracing_ctx; } void Compiler::enableToFp16() { _options.fp16_enable = true; } @@ -132,12 +157,10 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) { // Set control flow backend for control flow operators { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = - backend::controlflow::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = - backend::controlflow::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = - backend::controlflow::Config::ID; + auto &cfid = backend::controlflow::Config::ID; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid; } // FIXME This is a workaround for bcq operations, should remove it @@ -157,7 +180,11 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl; VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl; VERBOSE(Compiler) << "executor : " << _options.executor << std::endl; - VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl; + VERBOSE(Compiler) << "manual backend_for_all : " + << _options.manual_scheduler_options.backend_for_all << std::endl; + VERBOSE(Compiler) << "manual_scheduler_options : " + << getOpBackends(_options.manual_scheduler_options.opcode_to_backend) + << std::endl; VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl; VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl; VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl; @@ -202,7 +229,6 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) // Lower: Assign backend std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs; _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - _options.is_primary_subgraph = (index == ir::SubgraphIndex{0}); onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); @@ -230,6 +256,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) _subgraphs.reset(); + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); + dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); + } + // Shape inference. { const auto primary_subg_idx = ir::SubgraphIndex{0}; @@ -266,12 +300,8 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) auto &lowered_subg = pair.second; auto indexed_ranks = lowered_subg->indexed_ranks(); - _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0}); - - onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); - dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); - - ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value())); + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); auto executor = std::unique_ptr<exec::IExecutor>{ diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index bb325ffbc..356feed7c 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,6 +16,7 @@ #include "ExecutorFactory.h" +#include <deque> #include <functional> #include "exec/ExecutionObservers.h" #include "exec/LinearExecutor.h" @@ -25,16 +26,13 @@ #include "compiler/ExecutionBuilder.h" #include "exec/ExecTime.h" #include "compiler/Linear.h" -#include "compiler/TensorBuilders.h" -#include "backend/IConstantInitializer.h" -#include "backend/IKernelGenerator.h" -#include "backend/IOptimizer.h" #include "backend/IPortableTensor.h" -#include "backend/ITensorRegister.h" #include "backend/controlflow/Config.h" #include "backend/controlflow/KernelGenerator.h" #include "backend/controlflow/UserTensor.h" #include "backend/controlflow/TensorBuilder.h" +#include "util/TracingCtx.h" + #include <memory> namespace onert @@ -66,6 +64,36 @@ private: std::shared_ptr<backend::IConfig> _config; }; +void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph, + const ir::OperandIndexSequence &indices) +{ + // TODO Store controlflow backend in BackendContext + std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg; + for (const auto &e : lowered_graph.backend_contexts()) + { + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::controlflow::Config::ID) + { + cf_tensor_reg = + std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry); + } + } + assert(cf_tensor_reg); + + for (auto ind : indices) + { + const auto &operand = lowered_graph.graph().operands().at(ind); + auto tensor = std::make_unique<backend::controlflow::IOTensor>( + operand.info(), + ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */ + ); + + // Add tensor to controlflow TensorRegistry. + cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); + } +} + } // namespace } // namespace onert @@ -134,97 +162,6 @@ void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_g } } -void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph, - const std::vector<ir::OpSequenceIndex> &order) -{ - for (const auto index : order) - { - const auto &op_seq = lowered_graph->op_seqs().at(index); - const auto backend = lowered_graph->getLowerInfo(index)->backend(); - const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register; - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs(); - - if (tensor_register) - { - // Custom registration - tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo()); - } - else - { - // Default registration - for (const auto op_idx : op_seq) - { - const auto &op = lowered_graph->graph().operations().at(op_idx); - for (const auto &index : - (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED)) - { - if (!tensor_builder->isRegistered(index) && !model_io.contains(index)) - { - const auto &operand_lower_info = - lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement(); - - // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) - // op.getOutputs() of permute (CPU) returns tensor A - // but tensor A belongs to the backend of acl_cl. - // So, we have to make this tensor NOT registered for CPU. - if (operand_lower_info.backend() != backend) - continue; - - const auto &obj = lowered_graph->graph().operands().at(index); - const auto frontend_layout = op_seq.getLayout(); - const auto backend_layout = operand_lower_info.layout(); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), - obj.isConstant()}; - tensor_builder->registerTensorInfo(index, backend_info, backend_layout); - } - } - } - } - } -} - -std::vector<backend::ITensor *> -ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices) -{ - std::vector<backend::ITensor *> ret; - - // TODO Store controlflow backend in BackendContext - std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder; - std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg; - for (const auto &e : lowered_graph.backend_contexts()) - { - auto backend = e.first; - auto &context = e.second; - if (backend->config()->id() == backend::controlflow::Config::ID) - { - cf_tensor_builder = - std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder); - cf_tensor_reg = - std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry); - } - } - assert(cf_tensor_builder); - assert(cf_tensor_reg); - - for (auto ind : indices) - { - const auto &operand = lowered_graph.graph().operands().at(ind); - auto tensor = std::make_unique<backend::controlflow::UserTensor>( - operand.info(), - ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */ - ); - - // Add tensor to controlflow TensorRegistry. - cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor)); - auto *itensor = cf_tensor_reg->getITensor(ind); - ret.push_back(itensor); - } - return ret; -} - void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph) { TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true}; @@ -260,110 +197,78 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo initializeBackendContext(lowered_graph.get()); - // linearize - assert(!lowered_graph->graph().isBuildingPhase()); - - /************************************************* - * Backend dependent analysis & optimization phase - *************************************************/ - - for (auto &pair : backend_contexts) - { - auto &optimizer = pair.second->optimizer; - if (optimizer) - optimizer->optimize(); - } + TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - /********************************************************** - * Backend dependent analysis & optimization phase finished - **********************************************************/ + assert(!lowered_graph->graph().isBuildingPhase()); - /*********************** - * Code generation phase - ***********************/ + initializeSubgraphIOTensors( + *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); + // linearize auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); - - std::vector<backend::ITensor *> input_tensors; - std::vector<backend::ITensor *> output_tensors; - if (options.is_primary_subgraph) - { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); - } - Linear::dump(*lowered_graph, order); - Linear::planTensors(*lowered_graph, order); - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; - TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - - for (auto &tensor_builder : tensor_builders) + for (auto &pair : backend_contexts) { - tensor_builder->prepare(); + pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo()); } prepareMigrantTensors(*lowered_graph); - ExecutionBuilder builder; - - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + // Give some runtime objects to controlflow KernelGenerator + for (auto &pair : backend_contexts) + { + auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get()); + if (cf_context != nullptr) { + auto cf_kernel_gen = cf_context->kernel_gen; cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) - { - fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); - } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); - }); - - for (auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); } + ExecutionBuilder builder; + + // Adjust the order of backends for the upcoming iteration + std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; for (auto &pair : backend_contexts) { - pair.second->initConsts(); + // NOTE controlflow backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "controlflow") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) + // Generate kernels + for (auto &pair : ordered_contexts) { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); - }); + auto codes = pair.second->genKernels(order, lowered_graph->op_seqs()); + for (auto &pair : codes) + { + auto &op_seq_ind = pair.first; + auto &fn_seq = pair.second; + auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind); + auto lower_info = lowered_graph->getLowerInfo(op_seq_ind); + if (options.he_profiling_mode) + fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)}); + } } - auto exec = - new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(code_map), order}; + auto code_map = builder.releaseCodeMap(); + + auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), + order, options.tracing_ctx}; if (!options.trace_filepath.empty()) { - std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph()); + std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>( + options.trace_filepath, exec->graph(), options.tracing_ctx); exec->addObserver(std::move(ctp)); } @@ -378,100 +283,81 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( initializeBackendContext(lowered_graph.get()); - auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); - - std::vector<backend::ITensor *> input_tensors; - std::vector<backend::ITensor *> output_tensors; - if (options.is_primary_subgraph) - { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); - } - - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - // To make tensors never be deallocated, this is a workaround to use static memory planner - for (auto &tensor_builder : tensor_builders) - { - lowered_graph->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &) { - if (tensor_builder->isRegistered(ind)) - { - tensor_builder->notifyFirstUse(ind); - } - }); - } + assert(!lowered_graph->graph().isBuildingPhase()); + + initializeSubgraphIOTensors( + *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); - for (auto &tensor_builder : tensor_builders) + // linearize + // This order is just for giving topological order info to the backens + // TODO When we pass a partial graph to a backend, we can remove this + auto order = Linear::linearize(*lowered_graph); + for (auto &pair : backend_contexts) { - tensor_builder->prepare(); + pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo()); } prepareMigrantTensors(*lowered_graph); - ExecutionBuilder builder; - - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + // Give some runtime objects to controlflow KernelGenerator + for (auto &pair : backend_contexts) + { + auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get()); + if (cf_context != nullptr) { - assert(cf_kernel_gen != nullptr); + auto cf_kernel_gen = cf_context->kernel_gen; cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) - { - fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); - } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); - }); - - for (const auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); } + ExecutionBuilder builder; + + // Adjust the order of backends for the upcoming iteration + std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; for (auto &pair : backend_contexts) { - pair.second->initConsts(); + // NOTE controlflow backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "controlflow") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) + // Generate kernels + for (auto &pair : ordered_contexts) { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); - }); + auto codes = pair.second->genKernels(order, lowered_graph->op_seqs()); + for (auto &pair : codes) + { + auto &op_seq_ind = pair.first; + auto &fn_seq = pair.second; + auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind); + auto lower_info = lowered_graph->getLowerInfo(op_seq_ind); + if (options.he_profiling_mode) + fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)}); + } } + auto code_map = builder.releaseCodeMap(); + exec::ExecutorBase *exec = nullptr; if (parallel) { - exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors, - tensor_regs, std::move(code_map)}; + exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), + options.tracing_ctx}; } else { - auto dataflow_exec = new exec::DataflowExecutor{ - std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)}; + auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs, + std::move(code_map), options.tracing_ctx}; if (options.he_profiling_mode) { std::vector<const backend::Backend *> backends; @@ -489,8 +375,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (!options.trace_filepath.empty()) { - std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph()); + std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>( + options.trace_filepath, exec->graph(), options.tracing_ctx); exec->addObserver(std::move(ctp)); } diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index e76b721ea..06dc691db 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -46,9 +46,6 @@ private: static void initializeBackendContext(compiler::LoweredGraph *lowered_graph); static void runTensorRegistration(compiler::LoweredGraph *lowered_graph, const std::vector<ir::OpSequenceIndex> &order); - static std::vector<backend::ITensor *> - initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices); static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph); static exec::IExecutor * createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 30c8f72a5..fdd2a7653 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -19,8 +19,6 @@ #include "Linear.h" #include "backend/IConfig.h" -#include "backend/IConstantInitializer.h" -#include "backend/ITensorRegister.h" #include "backend/Backend.h" #include "util/logging.h" @@ -62,190 +60,5 @@ void Linear::dump(const compiler::LoweredGraph &lowered_graph, } } -void Linear::planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order) -{ - const auto &graph = lowered_graph.graph(); - ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map; - - ir::OperandIndexMap<uint32_t> uses_map; - ir::OperandIndexMap<uint32_t> def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - const auto lower_info = lowered_graph.getLowerInfo(ind); - // TODO Remove if onert doesn't support anymore such as - // GeneratedTests.reshape_quant8_weights_as_inputs - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - !graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - // Unused input of subgraph - // TODO Register unused input as nullptr in tensor_builder - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - bool is_const = obj.isConstant(); - if (is_const) - { - constants.append(ind); - } - - auto factor = lower_info->def_factors().getOnlyElement(); - auto backend = factor.backend(); - auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder; - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any op_seq (No use and def) - const auto info = obj.info(); - const auto backend_layout = factor.layout(); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, backend_layout); - } - - tensor_builder_map[ind] = tensor_builder; - }); - - const auto io_tensors = - (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - - // If a tensor is model output, increase the use of the tensor. - // This aim is same to above one. - for (const auto &ind : io_tensors) - { - uses_map[ind]++; - } - - // Start scanning to do notify{First|Last}Use for each tensor - - // If a tensor is a constant, increase the use of the tensor. - // It makes the tensor not be dealloced. It means these will be deallocated last. - // And allocate constant operands first - VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) - { - uses_map[ind]++; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - - // Allocate Model's inputs - VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl; - for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED) - { - auto tensor_builder = tensor_builder_map[ind]; - if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs - continue; - tensor_builder->notifyFirstUse(ind); - } - - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan DEF of inputs. If variable tensor, allocate it - // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - VERBOSE(LINEAR) << "TENSORS" << std::endl; - for (const auto op_seq_ind : order) - { - const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind); - for (const auto &op_idx : op_seq.operations()) - { - for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - } - - // Scan variable tensors - // This tensor has features like constant. But OperandInfo and LowerInfo treat them as - // non-constant because of less memory usage by memory planning in here - for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - const auto &operand = graph.operands().at(ind); - if (operand.info().isVariable()) - { - // The variable tensor with buffer is not supported yet - assert(operand.data() == nullptr); - assert(operand.getUses().size() == 1 && !operand.getDef().valid()); - assert(lowered_graph.getLowerInfo(ind)->def_factors().size() == 1 && - lowered_graph.getLowerInfo(ind)->use_factors().size() == 1); - assert(uses_map[ind] == 1 && def_map[ind] == 0); - tensor_builder_map[ind]->notifyFirstUse(ind); - } - } - - for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder_map[ind]->notifyLastUse(ind); - - // plan for deallocation of dynamic tensor - auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager(); - if (dyn_tensor_manager) - { - const auto *backend = - lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend(); - auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry; - auto *tensor = tensor_registry->getITensor(ind); - assert(tensor); - if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated - dyn_tensor_manager->planDealloc(op_idx, tensor); - } - } - } - } - } - - // Dispose and validate - for (const auto &ind : io_tensors) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); -} - } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h index 1e24cf92b..56b42ccb0 100644 --- a/runtime/onert/core/src/compiler/Linear.h +++ b/runtime/onert/core/src/compiler/Linear.h @@ -22,7 +22,6 @@ #include "ir/OpSequences.h" #include "ir/Index.h" -#include "backend/ITensorBuilder.h" #include "compiler/LoweredGraph.h" namespace onert @@ -44,8 +43,6 @@ public: static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph); static void dump(const compiler::LoweredGraph &lowered_graph, const std::vector<ir::OpSequenceIndex> &order); - static void planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order); }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 673d7d3e8..6d5210dc5 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -32,6 +32,7 @@ #include "compiler/BackendResolver.h" #include "compiler/ManualScheduler.h" #include "compiler/HEScheduler.h" +#include "util/TracingCtx.h" namespace onert { @@ -40,6 +41,13 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { + // set tracing_ctx for copied graph + if (options.tracing_ctx) + { + auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph); + options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value()); + } + bool linear_executor = (options.executor == "Linear"); // Build backend contexts @@ -112,7 +120,7 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option .run(); // Set LowerInfo for each operand from the operand::LowerInfo holder - manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph); + manipulateLowerInfo(operands_lower_info); dumpLowerInfo(); } @@ -126,7 +134,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option // Optimization passes pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run(); - VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl; + VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl; + for (auto operand : _graph.getInputs()) + VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl; + for (auto operand : _graph.getOutputs()) + VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl; dumpOpSequences(_op_seqs, _graph.operations()); // Graph verifications @@ -322,50 +334,22 @@ void LoweredGraph::makeOpSequences( } void LoweredGraph::manipulateLowerInfo( - ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, - bool is_primary) + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info) { const auto controlflow_backend = BackendManager::get().getControlflow(); - // TODO Rather than handling primary graph specially, - // let the permute inserted and remove it later - if (is_primary) + // TODO Rather than using NHWC Get frontend layout of this node from IR + auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; + for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) { - // TODO Rather than using NHWC Get frontend layout of this node from IR - auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - assert(lower_info->def_factors().empty()); - lower_info->addDefPermuteFactor(factor); - } - for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - lower_info->addUsePermuteFactor(factor); - } + auto &&lower_info = operands_lower_info.at(index); + assert(lower_info->def_factors().empty()); + lower_info->addDefPermuteFactor(factor); } - else + for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) { - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0)) - { - // In case of not that Graph's input is not used in any operation and not the graph's - // output. - // In other words, it is not unused input in Graph. - lower_info->addDefPermuteFactor(*lower_info->use_factors().begin()); - } - else - { - // In case of that an operand is Graph's input and not input or output of any operation - lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ - controlflow_backend, - ir::Layout::NHWC // TODO Get frontend layout of this node from IR - }); - } - } + auto &&lower_info = operands_lower_info.at(index); + lower_info->addUsePermuteFactor(factor); } for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) { @@ -446,8 +430,11 @@ void LoweredGraph::dumpLowerInfo() sstream << (shape.dim(i)) << " "; } sstream << "}" << std::endl; - sstream << " - Def ir::Operations : " << def_ops << std::endl; - sstream << " - Use ir::Operations : " << use_ops << std::endl; + sstream << " - Def Operations : " << def_ops << std::endl; + sstream << " - Use Operations : " << use_ops << std::endl; + sstream << " - Data : " + << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A") + << std::endl; sstream << " - Lower Info" << std::endl; sstream << " - Def Backends : " << def_layouts << std::endl; sstream << " - Use Backends : " << use_layouts << std::endl; diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc index ed49ee56f..1f4a47864 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.cc +++ b/runtime/onert/core/src/compiler/ManualScheduler.cc @@ -100,10 +100,11 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap } // Dump final assignment - backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) { - VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " - << backend.config()->id() << std::endl; - }); + WHEN_LOG_ENABLED(backend_resolver->iterate( + [&](const ir::OperationIndex &index, const backend::Backend &backend) { + VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " + << backend.config()->id() << std::endl; + })); return backend_resolver; } diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc index c18178da9..e0c9f5283 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.cc +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -37,7 +37,7 @@ namespace compiler { ShapeValidator::ShapeValidator(const ir::Graph &graph) - : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN} + : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN} { } @@ -59,7 +59,7 @@ void ShapeValidator::operator()() // creating Compiler assert(_graph.subgraphs() == nullptr); - _current_op_seq_layout = _graph.layout(); + _current_layout = _graph.layout(); _graph.operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); @@ -90,7 +90,7 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); @@ -101,6 +101,14 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); + if (node.getInputs().size() != 2) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2); + OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2)); + OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2); + } + OP_REQUIRES(input_shape.C == output_shape.C); } @@ -330,7 +338,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); @@ -355,7 +363,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToDepth &node) const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); const auto block_size = node.param().block_size; @@ -471,7 +479,7 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node) OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); // The kernel has only IHWO layout on frontend @@ -516,7 +524,7 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node) const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h index f40c098d5..763cf7ce3 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.h +++ b/runtime/onert/core/src/compiler/ShapeValidator.h @@ -93,7 +93,7 @@ private: // TODO Remove _ctx field const ir::Graph &_graph; const ir::Operands &_ctx; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index d3b083b78..1f2c6f3b9 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -142,12 +142,12 @@ void StaticShapeInferer::dump() } } -void StaticShapeInferer::visit(const ir::operation::ArgMax &op) +void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) { - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; const auto &input = _operands.at(input_idx); - const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto &axis = _operands.at(axis_idx); // get mutable output operand @@ -166,7 +166,8 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op) axis_value = axis_value < 0 ? axis_value + rank : axis_value; // re-sizing output shape - ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank); + ir::Shape new_shape = + shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank); output.info().shape(new_shape); } @@ -335,35 +336,47 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) // even when axis is constant, output shape should be recalculated since user might call // nnfw_set_input_tensorinfo(input, some_new_shape) - auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base()); - assert(axis_buf); + auto axis_type = axis.typeInfo().type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); + + assert(axis.data()->base()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast<const int32_t *>(axis.data()->base())[0] + : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]); // re-sizing output shape - ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]); + ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value); output.info().shape(new_shape); } void StaticShapeInferer::visit(const ir::operation::Fill &op) { - const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)}; + const auto &shape = _operands.at(shape_idx); const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (!input.isConstant()) + if (!shape.isConstant()) { output.info().setDynamic(); _return_has_dynamic_tensor = true; return; } - assert(input.typeInfo().type() == ir::DataType::INT32); + const auto dims_type = shape.typeInfo().type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); - auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base()); - assert(input_buf); + auto dims_buf = shape.data()->base(); + assert(dims_buf); + + const auto &dims_shape = shape.info().shape(); + auto new_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape<int32_t>( + dims_shape, reinterpret_cast<const int32_t *>(dims_buf)) + : shape_inference::inferFillShape<int64_t>( + dims_shape, reinterpret_cast<const int64_t *>(dims_buf))); - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf); output.info().shape(new_shape); } diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h deleted file mode 100644 index 3b0360b4b..000000000 --- a/runtime/onert/core/src/compiler/TensorBuilders.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__ -#define __ONERT_COMPILER_TENSOR_BUILDERS_H__ - -#include <unordered_set> -#include <memory> -#include "backend/BackendContext.h" -#include "backend/Backend.h" -#include "backend/controlflow/Config.h" -#include "backend/controlflow/TensorBuilder.h" -#include "util/logging.h" - -namespace onert -{ -namespace compiler -{ - -class TensorBuilders -{ -public: - TensorBuilders() = default; - - TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow) - { - for (const auto &e : backend_contexts) - { - if (e.first->config()->id() == backend::controlflow::Config::ID) - { - _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>( - e.second->tensor_builder); - if (include_controlflow) - _tensor_builders.insert(e.second->tensor_builder); - } - else - { - _tensor_builders.insert(e.second->tensor_builder); - } - } - } - - std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const - { - return _tensor_builders.cbegin(); - } - std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const - { - return _tensor_builders.cend(); - } - - std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const - { - return _cf_tensor_builder; - } - -private: - std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders; - std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder; -}; - -} // namespace compiler -} // namespace onert - -#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__ diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index c83a72ada..8467d51c8 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -130,9 +130,11 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde // Generate output operand and permute operation auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo()); - // change model output if operand_index is model output index + // change model output if operand_index is model output index and the out operand is controlflow + // backend auto &model_outputs = _graph.getOutputs(); - if (model_outputs.contains(operand_index)) + const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow(); + if (model_outputs.contains(operand_index) && factor.backend() == cf_backend) { model_outputs.replace(operand_index, out_operand_index); } @@ -191,8 +193,10 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde const auto &node = _graph.operations().at(node_index); VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl; - VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl; - VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl; + VERBOSE_F() << " - Input (original) Operand : " << operand_index << "(" + << input_factor.backend()->config()->id() << ")" << std::endl; + VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "(" + << factor.backend()->config()->id() << ")" << std::endl; // OpSequence { diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc index 53bc3c204..b81a75794 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.cc +++ b/runtime/onert/core/src/exec/DataflowExecutor.cc @@ -78,11 +78,10 @@ bool DataflowExecutor::noWaitingJobs() } DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, const compiler::TensorRegistries &tensor_regs, - compiler::CodeMap &&code_map) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}, + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}, _code_map{std::move(code_map)} { VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; @@ -143,7 +142,9 @@ void DataflowExecutor::executeImpl() } assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); while (!_ready_jobs.empty()) { @@ -157,7 +158,7 @@ void DataflowExecutor::executeImpl() const backend::Backend *backend = _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend(); - _subject.notifyJobBegin(this, op_seq, backend); + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); job->fn_seq()->initRunning(); @@ -167,13 +168,13 @@ void DataflowExecutor::executeImpl() job->run(); - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); notify(job_index); _finished_jobs[job_index] = std::move(job); } assert(noWaitingJobs()); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index 69dfda15c..b72c0d030 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -28,6 +28,7 @@ #include <memory> #include "exec/ExecutorBase.h" #include "compiler/CodeMap.h" +#include "util/TracingCtx.h" namespace onert { @@ -50,9 +51,8 @@ public: * @param code_map OpSequence and its code map */ DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, - const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc index 1666d3f08..2d9d534f1 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc @@ -92,12 +92,12 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) +void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op) { - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; const auto input = _tensor_registry->getITensor(input_idx); - const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto axis = _tensor_registry->getITensor(axis_idx); auto output_ind = op.getOutputs().at(0); @@ -111,7 +111,7 @@ void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) const auto rank = input_shape.rank(); axis_value = axis_value < 0 ? axis_value + rank : axis_value; - ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank); + ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank); output->applyShape(new_shape); assert(output->buffer() != nullptr); @@ -388,10 +388,16 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op) auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS); auto axis = _tensor_registry->getITensor(axis_ind); - auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer()); - assert(axis_buf); + auto axis_type = axis->data_type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); - auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]); + assert(axis->buffer()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast<const int32_t *>(axis->buffer())[0] + : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]); + + auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value); output->applyShape(output_shape); assert(output->buffer() != nullptr); @@ -402,19 +408,24 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op) // check if output is not dynamic auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT); - auto input = _tensor_registry->getITensor(input_ind); - ir::Shape input_shape = input->getShape(); + auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE); + auto shape = _tensor_registry->getITensor(shape_ind); - if ((!input->is_dynamic()) && (!output->is_dynamic())) + if ((!shape->is_dynamic()) && (!output->is_dynamic())) return; - assert(input->data_type() == ir::DataType::INT32); + const auto dims_type = shape->data_type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); - auto input_buf = reinterpret_cast<const int32_t *>(input->buffer()); - assert(input_buf); + auto dims_buf = shape->buffer(); + assert(dims_buf); - auto output_shape = shape_inference::inferFillShape(input_shape, input_buf); + const auto &dims_shape = shape->getShape(); + auto output_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape<int32_t>( + dims_shape, reinterpret_cast<const int32_t *>(dims_buf)) + : shape_inference::inferFillShape<int64_t>( + dims_shape, reinterpret_cast<const int64_t *>(dims_buf))); output->applyShape(output_shape); assert(output->buffer() != nullptr); diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h index 846d0930b..d2ddbad34 100644 --- a/runtime/onert/core/src/exec/ExecTime.h +++ b/runtime/onert/core/src/exec/ExecTime.h @@ -94,7 +94,7 @@ public: /** * @brief Update metrics file with new data. */ - void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); } + void storeOperationsExecTime() const { _json.storeOperationsExecTime(); } static const int64_t NOT_FOUND = -1; private: diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc index ddb1fb6a0..d5003b126 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.cc +++ b/runtime/onert/core/src/exec/ExecutionObservee.cc @@ -26,37 +26,38 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer) _observers.emplace_back(std::move(observer)); } -void ExecutionObservee::notifyModelBegin(IExecutor *executor) +void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind) { for (auto &o : _observers) { - o->handleBegin(executor); + o->handleSubgraphBegin(ind); } } -void ExecutionObservee::notifyModelEnd(IExecutor *executor) +void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind) { for (auto &o : _observers) { - o->handleEnd(executor); + o->handleSubgraphEnd(ind); } } -void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, +void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, + const ir::OpSequence *op_seq, const backend::Backend *backend) { for (auto &o : _observers) { - o->handleBegin(executor, op_seq, backend); + o->handleJobBegin(executor, index, op_seq, backend); } } -void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, + const ir::OpSequence *op_seq, const backend::Backend *backend) { for (auto &o : _observers) { - o->handleEnd(executor, op_seq, backend); + o->handleJobEnd(executor, index, op_seq, backend); } } diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h index 49d409a3a..62b3f6201 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.h +++ b/runtime/onert/core/src/exec/ExecutionObservee.h @@ -20,6 +20,7 @@ #include <list> #include "exec/ExecutionObservers.h" +#include "ir/Index.h" namespace onert { @@ -39,11 +40,11 @@ public: * @param observer Observer to be added */ void add(std::unique_ptr<IExecutionObserver> observer); - void notifyModelBegin(IExecutor *executor); - void notifyModelEnd(IExecutor *executor); - void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + void notifySubgraphBegin(ir::SubgraphIndex ind); + void notifySubgraphEnd(ir::SubgraphIndex ind); + void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq, const backend::Backend *backend); - void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq, const backend::Backend *backend); private: diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 066b52ee1..18c0c1dd3 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -17,12 +17,62 @@ #include "exec/ExecutionObservers.h" #include <string> +#include <sstream> #include "util/logging.h" #include "exec/IExecutor.h" #include "misc/polymorphic_downcast.h" #include "ir/OpSequence.h" #include "util/EventWriter.h" +#include "util/Utils.h" + +namespace +{ + +void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq, + decltype(EventCollector::Event::userData) &data) +{ + if (op_seq->size() == 0) + return; + + // From a tensor of shape [a, b, c], this will return a string "shape(a b c)". + // String like "[1, 2, 3]" looks better but this will be considered as a list in Json + // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult + auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) { + std::string shape_str; + auto &shape = g.operands().at(operand_idx).info().shape(); + for (int i = 0; i < shape.rank(); i++) + { + if (i == 0) + shape_str = "shape(" + std::to_string(shape.dim(i)); + else + shape_str += " " + std::to_string(shape.dim(i)); + } + shape_str += ")"; + + return shape_str; + }; + + const auto &first_op_idx = op_seq->operations().at(0); + const auto &first_op_node = g.operations().at(first_op_idx); + + auto &inputs = first_op_node.getInputs(); + auto size = inputs.size(); + for (size_t i = 0; i < size; i++) + { + auto operand_idx = inputs.at(i); + if (operand_idx.undefined()) + continue; + + std::string key("input_shape_" + std::to_string(i)); + std::string value = build_shape_str(operand_idx); + data.emplace_back(std::make_pair(key, value)); + } + + // add other userData as needed +} + +} // namespace namespace onert { @@ -30,8 +80,8 @@ namespace onert namespace exec { -void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *, - const onert::backend::Backend *backend) +void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex, + const ir::OpSequence *, const onert::backend::Backend *backend) { _timer = backend->config()->timer(); if (_timer == nullptr) @@ -39,8 +89,8 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence _timer->handleBegin(); } -void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq, + const backend::Backend *backend) { _timer->handleEnd(); const auto timer_res = _timer->getTime(); @@ -70,51 +120,74 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, } }; -ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph) - : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph} +TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx) + : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph}, + _tracing_ctx{tracing_ctx} { + // TODO Remove below after using _tracing_ctx + UNUSED_RELEASE(_tracing_ctx); + + _event_writer = EventWriter::get(filepath); + _event_writer->startToUse(); } -ChromeTracingObserver::~ChromeTracingObserver() +TracingObserver::~TracingObserver() { try { - EventWriter{_recorder}.writeToFiles(_base_filepath); + _event_writer->readyToFlush(std::move(_recorder)); } catch (const std::exception &e) { - std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl; + std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl; } } -void ChromeTracingObserver::handleBegin(IExecutor *) +void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"}); } -void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind, + const ir::OpSequence *op_seq, const backend::Backend *backend) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + std::string backend_id = backend->config()->id(); - _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, - opSequenceTag(op_seq, _graph.operations())}); + + auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, + opSequenceTag(op_seq, _graph.operations())}; + // add shape of inputs + setUserData(_graph, op_seq, ev.userData); + + _collector.onEvent(ev); } -void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind, + const ir::OpSequence *op_seq, const backend::Backend *backend) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + std::string backend_id = backend->config()->id(); _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id, opSequenceTag(op_seq, _graph.operations())}); } -void ChromeTracingObserver::handleEnd(IExecutor *) +void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"}); } -std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq, - const ir::Operations &operations) +std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq, + const ir::Operations &operations) { if (op_seq->size() == 0) return "Empty OpSequence"; diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index f8c2acca5..a9eebfee1 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -18,12 +18,16 @@ #define __ONERT_EXEC_OBSREVERS_H__ #include "exec/IFunction.h" +#include "ir/Index.h" #include "ir/OpSequence.h" #include "ExecTime.h" #include "util/ITimer.h" #include "exec/IExecutor.h" #include "util/EventCollector.h" #include "util/EventRecorder.h" +#include "util/EventWriter.h" +#include "util/TracingCtx.h" +#include "util/EventWriter.h" namespace onert { @@ -33,13 +37,15 @@ class IExecutionObserver { public: /// @brief Invoked just before model (not individual operation) execution begins - virtual void handleBegin(IExecutor *) { return; } + virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; } - virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; - virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; + virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) = 0; + virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) = 0; /// @brief Invoked just after model (not individual operation) execution ends - virtual void handleEnd(IExecutor *) { return; } + virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; } virtual ~IExecutionObserver() = default; }; @@ -51,10 +57,12 @@ public: : _et(std::move(et)), _graph(graph) { } - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; - void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); } + void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); } private: std::unique_ptr<util::ITimer> _timer; @@ -62,24 +70,28 @@ private: const ir::Graph &_graph; }; -class ChromeTracingObserver : public IExecutionObserver +class TracingObserver : public IExecutionObserver { public: - ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph); - ~ChromeTracingObserver(); - void handleBegin(IExecutor *) override; - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *) override; + TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx); + ~TracingObserver(); + void handleSubgraphBegin(ir::SubgraphIndex) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleSubgraphEnd(ir::SubgraphIndex) override; private: static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations); private: - const std::string &_base_filepath; - EventRecorder _recorder; + std::unique_ptr<EventRecorder> _recorder; EventCollector _collector; const ir::Graph &_graph; + EventWriter *_event_writer; + const util::TracingCtx *_tracing_ctx; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index 018a0bba0..588a3258d 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -15,11 +15,11 @@ */ #include "ExecutorBase.h" +#include "ShapeConverter.h" -#include "backend/ITensor.h" #include "backend/controlflow/UserTensor.h" -#include "backend/cpu_common/Tensor.h" #include "util/logging.h" +#include "misc/polymorphic_downcast.h" namespace onert { @@ -27,43 +27,27 @@ namespace exec { ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, - const compiler::TensorRegistries &tensor_regs) - : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex() + const compiler::TensorRegistries &tensor_regs, + const util::TracingCtx *tracing_ctx) + : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(), + _tracing_ctx(tracing_ctx) { - // TODO Fix the way of knowing whether it is primary or not - bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty()); - if (!primary_executor) - { - auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector<backend::ITensor *> list; - for (auto ind : ind_seq) - { - backend::ITensor *tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - list.push_back(tensor); - } - return list; - }; - auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector<backend::ITensor *> list; - for (auto ind : ind_seq) - { - backend::ITensor *tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - list.push_back(tensor); - } - return list; - }; - _input_tensors = build_input_tensor_list(_graph.getInputs()); - _output_tensors = build_output_tensor_list(_graph.getOutputs()); - } + auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { + assert(tensors.empty()); + for (auto ind : ind_seq) + { + backend::ITensor *tensor = tensor_regs.getITensor(ind); + assert(tensor != nullptr); + auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor); + tensors.push_back(io_tensor); + } + }; + build_tensor_list(_graph.getInputs(), _input_tensors); + build_tensor_list(_graph.getOutputs(), _output_tensors); } -void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors, - const std::shared_ptr<IPermuteFunction> &pre_fn) +void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) { // For thread-safe, use mutex // TODO: if all used backends on this executor are thread-safe, @@ -71,31 +55,37 @@ void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors, // Deadlock occurs when an Executor is called recursively. std::lock_guard<std::mutex> lock(_mutex); - assert(src_tensors.size() == _graph.getInputs().size()); - assert(src_tensors.size() == _input_tensors.size()); - for (uint32_t n = 0; n < _graph.getInputs().size(); ++n) + assert(inputs.size() == _graph.getInputs().size()); + assert(inputs.size() == _input_tensors.size()); + for (uint32_t n = 0; n < inputs.size(); ++n) { - // when user changes input shape, the input tensor is dynamic and its memory is not allocated. - // This code find the info to allocate dynamic tensor, and allocate memory based on the source - // tensor's shape set by caller. - const auto src_tensor = src_tensors[n]; + const auto input = inputs[n]; + assert(input->buffer() != nullptr); auto input_tensor = _input_tensors[n]; - // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors - if (src_tensor != nullptr && input_tensor != nullptr) + assert(input_tensor != nullptr); + if (input != nullptr) { - const auto orig_input_shape = input_tensor->getShape(); + const auto orig_input_shape = input_tensor->orig_info().shape(); const auto changed_input_shape = - convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout()); + convertShape(input->getShape(), input->layout(), input_tensor->orig_layout()); if (orig_input_shape != changed_input_shape) { input_tensor->set_dynamic(); } } + input_tensor->setTensor(input); } - // TODO Move calling permute_fn.run() into executeImpl() - assert(pre_fn); - pre_fn->run(); + assert(outputs.size() == _graph.getOutputs().size()); + assert(outputs.size() == _output_tensors.size()); + for (uint32_t n = 0; n < outputs.size(); ++n) + { + const auto output = outputs[n]; + // assert(dst_tensor->buffer() != nullptr); + auto output_tensor = _output_tensors[n]; + assert(output_tensor != nullptr); + output_tensor->setTensor(output); + } executeImpl(); } @@ -111,19 +101,19 @@ void ExecutorBase::execute(const IODescription &desc) assert(_input_tensors.size() == desc.inputs.size()); for (uint32_t i = 0; i < _input_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]); - assert(tensor); + auto tensor = _input_tensors[i]; + + // TODO Check if (desc.inputs[i] == nullptr) + // TODO Better design for ITensor? (we need const_cast as ITensor is writable) + tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), + desc.inputs[i]->size); + auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i}); if (input_shape != desc.dynamic_input_shapes.end()) { tensor->set_dynamic(); tensor->setShape(input_shape->second); } - // TODO Check if (desc.inputs[i] == nullptr) - // TODO Better design for ITensor? (we need const_cast as ITensor is writable) - tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), - desc.inputs[i]->size); handleDynamicInputTensor(ir::IOIndex{i}, desc); } @@ -131,13 +121,12 @@ void ExecutorBase::execute(const IODescription &desc) assert(_output_tensors.size() == desc.outputs.size()); for (uint32_t i = 0; i < _output_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]); - assert(tensor); - tensor->set_dynamic(); // It can't be resized but shape could change + auto tensor = _output_tensors[i]; + if (desc.outputs[i] == nullptr) throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."}; - tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size); + tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size); + tensor->set_dynamic(); // It can't be resized but shape could change } executeImpl(); diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index 8a6ec9174..5d95c10bf 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -17,23 +17,25 @@ #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__ #define __ONERT_EXEC_EXECUTOR_BASE_H__ -#include <mutex> - #include "IPermuteFunction.h" -#include "exec/ExecutionObservers.h" -#include "ShapeConverter.h" #include "exec/IExecutor.h" -#include "compiler/LoweredGraph.h" -#include "ir/LowerInfoMap.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" #include "exec/ExecTime.h" -#include "exec/IFunction.h" -#include "backend/IDynamicTensorManager.h" -#include "backend/ITensorManager.h" #include "exec/ExecutionObservee.h" +#include "exec/IFunction.h" +#include "exec/IODescription.h" +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/LowerInfoMap.h" +#include "ir/OperationIndexMap.h" +#include "compiler/LoweredGraph.h" #include "compiler/TensorRegistries.h" -#include <list> +#include "backend/controlflow/IOTensor.h" +#include "util/TracingCtx.h" + +#include <cstdint> +#include <memory> +#include <mutex> +#include <vector> namespace onert { @@ -49,25 +51,17 @@ public: * @param tensor_builders Tensor builders that are currently used */ ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, - const compiler::TensorRegistries &tensor_regs); + const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx); virtual ~ExecutorBase() = default; const ir::Graph &graph() final { return _graph; } - /** - * @brief Execute without IODescription - * - * @param src_tensor Tensor list that will be copied to input tensors of this - * @param pre_fn The permutation function that copy from src_tensor to input tensors of this - */ - void execute(const std::vector<backend::ITensor *> &src_tensors, - const std::shared_ptr<IPermuteFunction> &pre_fn); - void execute(const IODescription &desc) final; + void execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) override; + // Used only in Dataflow and Parallel Executors void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final { @@ -78,9 +72,10 @@ public: void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; - const std::vector<backend::ITensor *> &getInputTensors() const { return _input_tensors; } - - const std::vector<backend::ITensor *> &getOutputTensors() const { return _output_tensors; } + const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override + { + return _output_tensors; + } protected: /** @@ -93,9 +88,10 @@ protected: std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; std::unique_ptr<compiler::LoweredGraph> _lowered_graph; const ir::Graph &_graph; - std::vector<backend::ITensor *> _input_tensors; - std::vector<backend::ITensor *> _output_tensors; + std::vector<backend::controlflow::IOTensor *> _input_tensors; + std::vector<backend::controlflow::IOTensor *> _output_tensors; std::mutex _mutex; + const util::TracingCtx *_tracing_ctx; private: void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc); diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h index 11017edc9..8f62156a6 100644 --- a/runtime/onert/core/src/exec/IPermuteFunction.h +++ b/runtime/onert/core/src/exec/IPermuteFunction.h @@ -120,7 +120,8 @@ protected: } assert(src_tensor != dst_tensor); - assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type())); + if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type())) + throw std::runtime_error("data type does not match"); switch (src_tensor->data_type()) { case ir::DataType::FLOAT32: diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc index 72a18def1..b29216a2f 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.cc +++ b/runtime/onert/core/src/exec/JSONExecTime.cc @@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info, stream.seekp(-2, std::ofstream::end); } -void JSON::uploadOperationsExecTime() const +void JSON::storeOperationsExecTime() const { std::ofstream stream(_measurement_file); if (!stream.is_open()) diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h index a64cb3133..8987d723c 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.h +++ b/runtime/onert/core/src/exec/JSONExecTime.h @@ -54,18 +54,16 @@ public: loadOperationsExecTime(); }; /** - * @brief Update _operations_exec_time_file with new data. + * @brief Update _measurement_file with new data. */ - void uploadOperationsExecTime() const; + void storeOperationsExecTime() const; private: ///@brief file containing measurements std::string _measurement_file; std::unordered_map<std::string, const backend::Backend *> _backends; - std::unordered_map< - const backend::Backend *, - std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>> - &_measurements; + MeasurementData &_measurements; + /** * @brief Helper function for inserting data to OperationExecTimes * @@ -86,7 +84,7 @@ private: void printOperation(const std::map<uint32_t, int64_t> &operation_info, std::ofstream &stream) const; /** - * @brief Parse and load operations_exec_time from _operations_exec_time_file. + * @brief Parse and load _measurements from _measurement_file. */ void loadOperationsExecTime(); }; diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc index 6e6ca110f..a6d447312 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.cc +++ b/runtime/onert/core/src/exec/LinearExecutor.cc @@ -39,7 +39,9 @@ char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operati void LinearExecutor::executeImpl() { - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); for (auto &&code : _code) { const auto op_seq = code.op_seq; @@ -48,7 +50,7 @@ void LinearExecutor::executeImpl() #ifdef RUY_PROFILER ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations())); #endif - _subject.notifyJobBegin(this, op_seq, backend); + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); auto &fn_seq = code.fn_seq; @@ -58,9 +60,9 @@ void LinearExecutor::executeImpl() fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor); fn_seq->run(); - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); } - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); } } // namespace exec diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index 22d00ec30..d43c97012 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -27,6 +27,7 @@ #include "compiler/Linear.h" #include "exec/FunctionSequence.h" #include "compiler/CodeMap.h" +#include "util/TracingCtx.h" namespace onert { @@ -47,11 +48,9 @@ public: * @param code_map OpSequence and its code map */ LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, - const std::vector<ir::OpSequenceIndex> &order) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs} + const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx} { for (auto index : order) { diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc index 676bdb5fa..e9e576ce8 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.cc +++ b/runtime/onert/core/src/exec/ParallelExecutor.cc @@ -60,12 +60,10 @@ void ParallelExecutor::notify(uint32_t finished_job_id) } ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, const compiler::TensorRegistries &tensor_regs, - compiler::CodeMap &&code_map) - : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(code_map)} + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx} { VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; } @@ -100,7 +98,10 @@ void ParallelExecutor::executeImpl() VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); + while (true) { std::unique_lock<std::mutex> lock{_mu_jobs}; @@ -126,9 +127,11 @@ void ParallelExecutor::executeImpl() auto op_sequence_index = _job_to_op_seq[job_index]; auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index); auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend(); - auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); }; + auto setup = [&, op_seq, backend]() { + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); + }; auto teardown = [&, job_index, op_seq, backend]() { - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); notify(job_index); }; @@ -146,7 +149,7 @@ void ParallelExecutor::executeImpl() // Wait for all the jobs done _scheduler->finish(); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 111c20c0c..fd9db42e1 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -28,6 +28,7 @@ #include <memory> #include "exec/DataflowExecutor.h" #include "ParallelScheduler.h" +#include "util/TracingCtx.h" namespace onert { @@ -51,9 +52,8 @@ public: * @param code_map OpSequence and its code map */ ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<backend::ITensor *> &input_tensors, - const std::vector<backend::ITensor *> &output_tensors, - const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h index 2e3f3ca54..99d7b3af7 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.h +++ b/runtime/onert/core/src/interp/InterpExecutor.h @@ -58,6 +58,15 @@ public: * @note It should be called after setting input and output buffer */ void execute(const exec::IODescription &desc) final; + void execute(const std::vector<backend::IPortableTensor *> &, + const std::vector<backend::IPortableTensor *> &) final + { + throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"}; + } + const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final + { + throw new std::runtime_error{"Interpreter does not support this function."}; + } private: const ir::Graph &_graph; diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc index 0473855d9..e1fb767fe 100644 --- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc @@ -116,7 +116,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, - cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr); } void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 9eedcd21a..8e75c4f53 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -42,6 +42,7 @@ size_t sizeOfDataType(DataType data_type) return sizeof(uint8_t); case DataType::QUANT_INT8_SYMM: case DataType::QUANT_INT8_ASYMM: + case DataType::QUANT_INT8_SYMM_PER_CHANNEL: return sizeof(int8_t); case DataType::FLOAT16: return sizeof(float16); diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index eecfe81cc..a8578b4ce 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -72,7 +72,14 @@ OperationDumper::OperationDumper(const std::string &start_msg) VERBOSE(LIR) << start_msg << std::endl; } -void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const ArgMinMax &node) +{ + std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)"; + VERBOSE(LIR) << "* " << node.name() << min_max << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis(" + << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} void OperationDumper::visit(const BatchToSpaceND &node) { @@ -159,6 +166,14 @@ void OperationDumper::visit(const ExpandDims &node) dumpUnaryInputOp(node, axis); } +void OperationDumper::visit(const Fill &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value(" + << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} + void OperationDumper::visit(const FullyConnected &node) { std::string inputs = @@ -505,7 +520,7 @@ void OperationDumper::visit(const While &node) } VERBOSE(LIR) << " - Inputs : " << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph (" - << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl; + << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl; std::string outputs; const auto &output_indices = node.getOutputs(); for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h index 91642ab13..fe18307b9 100644 --- a/runtime/onert/core/src/ir/OperationDumper.h +++ b/runtime/onert/core/src/ir/OperationDumper.h @@ -31,7 +31,7 @@ public: OperationDumper(const std::string &start_msg); public: - void visit(const operation::ArgMax &) override; + void visit(const operation::ArgMinMax &) override; void visit(const operation::BatchToSpaceND &node) override; void visit(const operation::BCQFullyConnected &node) override; void visit(const operation::BinaryArithmetic &node) override; @@ -48,6 +48,7 @@ public: void visit(const operation::ElementwiseUnary &) override; void visit(const operation::EmbeddingLookup &) override; void visit(const operation::ExpandDims &) override; + void visit(const operation::Fill &) override; void visit(const operation::FullyConnected &node) override; void visit(const operation::Gather &) override; void visit(const operation::HashtableLookup &) override; diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc index da08e81fc..6f81c2a56 100644 --- a/runtime/onert/core/src/ir/OperationValidator.cc +++ b/runtime/onert/core/src/ir/OperationValidator.cc @@ -55,6 +55,17 @@ bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex return operandType(idx1) == operandType(idx2); } +bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2) +{ + if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale()) + return false; + + if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset()) + return false; + + return true; +} + bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type) { return operandType(idx) == type; @@ -76,29 +87,54 @@ bool OperationValidator::isValidType(const OperandIndex &idx, void OperationValidator::visit(const operation::AddN &node) { + const auto output_index(node.getOutputs().at(0)); + int size = node.getInputs().size(); for (int i = 0; i < size; i++) { const auto input_index(node.getInputs().at(i)); OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32})); + OP_REQUIRES(isSameType(input_index, output_index)); } } +void OperationValidator::visit(const operation::ArgMinMax &node) +{ + const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT)); + const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS)); + const auto output_index(node.getOutputs().at(0)); + const auto output_type = node.param().output_type; + + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, output_type)); +} + void OperationValidator::visit(const operation::BatchMatMul &node) { const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS)); const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS)); + const auto output_index(node.getOutputs().at(0)); // Constant lhs and rhs is not implemented yet OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index)); + + // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float) + OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(lhs_index, rhs_index) || + ((operandType(lhs_index) == DataType::FLOAT32) && + (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM))); + OP_REQUIRES(isSameType(lhs_index, output_index)); } void OperationValidator::visit(const operation::BatchToSpaceND &node) { - const auto block_size_index{node.getInputs().at(operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; - // Non-constant block_size is not implemented yet - OP_REQUIRES(isConstant(block_size_index)); + OP_REQUIRES(isSameType(input_index, output_index)); } void OperationValidator::visit(const operation::BinaryArithmetic &node) @@ -122,10 +158,48 @@ void OperationValidator::visit(const operation::Comparison &node) OP_REQUIRES(isValidType(output_index, DataType::BOOL8)); } +void OperationValidator::visit(const operation::Concat &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + for (auto input_index : node.getInputs()) + { + OP_REQUIRES(isSameType(input_index, output_index)); + + // Int8 quantization requires same scale and zero point + if (isValidType(output_index, DataType::QUANT_INT8_ASYMM)) + { + OP_REQUIRES(isSameQuantParam(input_index, output_index)); + } + } +} + +void OperationValidator::visit(const operation::Conv2D &node) +{ + const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + + uint32_t stride_horizontal = node.param().stride.horizontal; + uint32_t stride_vertical = node.param().stride.vertical; + uint32_t dilation_width = node.param().dilation.width_factor; + uint32_t dilation_height = node.param().dilation.height_factor; + + OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0)); + OP_REQUIRES((dilation_width > 0) && (dilation_height > 0)); + OP_REQUIRES(isSameType(input_index, output_index)); +} + void OperationValidator::visit(const operation::DepthToSpace &node) { + const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + int32_t block_size = node.param().block_size; + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(input_index, output_index)); + OP_REQUIRES(block_size > 0); } @@ -151,6 +225,32 @@ void OperationValidator::visit(const operation::ElementwiseActivation &node) // Check if I/O types match OP_REQUIRES(isSameType(output_index, input_index)); + + switch (node.param().op_type) + { + case operation::ElementwiseActivation::Type::ELU: + OP_REQUIRES(isValidType(input_index, DataType::FLOAT32)); + break; + case operation::ElementwiseActivation::Type::LEAKY_RELU: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::LOGISTIC: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::RELU: + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM})); + break; + case operation::ElementwiseActivation::Type::TANH: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + } } void OperationValidator::visit(const operation::ElementwiseBinary &node) @@ -161,6 +261,13 @@ void OperationValidator::visit(const operation::ElementwiseBinary &node) OP_REQUIRES(isSameType(lhs_index, rhs_index)); OP_REQUIRES(isSameType(lhs_index, output_index)); + + const auto op_type = node.param().op_type; + if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND || + op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR) + { + OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8)); + } } void OperationValidator::visit(const operation::ElementwiseUnary &node) @@ -195,8 +302,17 @@ void OperationValidator::visit(const operation::ElementwiseUnary &node) void OperationValidator::visit(const operation::EmbeddingLookup &node) { const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)}; + const auto output_index{node.getOutputs().at(0)}; OP_REQUIRES(isValidType(lookups_index, DataType::INT32)); + + // TFLite: Allow hybrid type - value table & output + // NNAPI: Require same value table and output type + OP_REQUIRES( + isSameType(values_index, output_index) || + (isValidType(output_index, DataType::FLOAT32) && + (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM})))); } void OperationValidator::visit(const operation::ExpandDims &node) @@ -206,7 +322,19 @@ void OperationValidator::visit(const operation::ExpandDims &node) const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)}; OP_REQUIRES(isSameType(output_index, input_index)); - OP_REQUIRES(isValidType(axis_index, DataType::INT32)); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); +} + +void OperationValidator::visit(const operation::Fill &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)}; + const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)}; + + OP_REQUIRES(isSameType(output_index, value_index)); + OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, + {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8})); } void OperationValidator::visit(const operation::HashtableLookup &node) diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h index 2ea8000e5..5b95b16ba 100644 --- a/runtime/onert/core/src/ir/OperationValidator.h +++ b/runtime/onert/core/src/ir/OperationValidator.h @@ -44,10 +44,13 @@ public: public: void visit(const operation::AddN &node) override; + void visit(const operation::ArgMinMax &node) override; void visit(const operation::BatchMatMul &node) override; void visit(const operation::BatchToSpaceND &node) override; void visit(const operation::BinaryArithmetic &node) override; void visit(const operation::Comparison &node) override; + void visit(const operation::Concat &node) override; + void visit(const operation::Conv2D &node) override; void visit(const operation::DepthToSpace &node) override; void visit(const operation::DepthwiseConv2D &node) override; void visit(const operation::ElementwiseActivation &node) override; @@ -55,6 +58,7 @@ public: void visit(const operation::ElementwiseUnary &node) override; void visit(const operation::EmbeddingLookup &node) override; void visit(const operation::ExpandDims &node) override; + void visit(const operation::Fill &node) override; void visit(const operation::HashtableLookup &node) override; void visit(const operation::Pack &node) override; void visit(const operation::Pad &node) override; @@ -76,6 +80,7 @@ private: DataType operandType(const OperandIndex &idx); bool isConstant(const OperandIndex &idx); bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2); + bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2); bool isValidType(const OperandIndex &idx, const DataType &type); bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types); diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc index f3bd8fd73..989d905bf 100644 --- a/runtime/onert/core/src/ir/operation/ArgMax.cc +++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc @@ -14,10 +14,7 @@ * limitations under the License. */ -#include "ir/operation/ArgMax.h" - -#include <cassert> - +#include "ir/operation/ArgMinMax.h" #include "ir/OperationVisitor.h" namespace onert @@ -27,10 +24,10 @@ namespace ir namespace operation { -void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); } +void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); } -ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) +ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc index 6a0be7eb8..20b6fa124 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc @@ -57,7 +57,7 @@ std::string ElementwiseUnary::name() const {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}}, {ElementwiseUnaryType::SIN, std::string{"Sin"}}, {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}}, - {ElementwiseUnaryType::SQURE, std::string{"Squre"}}, + {ElementwiseUnaryType::SQUARE, std::string{"Square"}}, {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc index 45cce662e..9da93f68a 100644 --- a/runtime/onert/core/src/util/ConfigSource.cc +++ b/runtime/onert/core/src/util/ConfigSource.cc @@ -30,8 +30,10 @@ namespace util { static std::unique_ptr<IConfigSource> _source; +static std::unique_ptr<IConfigSource> _source_ext; void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); } +void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); } static IConfigSource *config_source() { @@ -67,6 +69,15 @@ static std::string getConfigOrDefault(const std::string &key) auto ret = config_source()->get(key); if (ret.empty()) { + // if env is not set, search from external + if (_source_ext.get()) + { + ret = _source_ext.get()->get(key); + } + } + // if not found search from defaults + if (ret.empty()) + { auto itr = defaults.find(key); if (itr != defaults.end()) { diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc index de37276bf..fd5618714 100644 --- a/runtime/onert/core/src/util/EventCollector.cc +++ b/runtime/onert/core/src/util/EventCollector.cc @@ -38,15 +38,17 @@ class DurationEventBuilder public: DurationEventBuilder(const std::string &ts) : _ts{ts} {} - DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const + DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const { DurationEvent evt; - evt.name = name; - evt.tid = tid; + evt.name = evt_collected.label; + evt.tid = evt_collected.backend; evt.ph = ph; evt.ts = _ts; + evt.args = evt_collected.userData; + return evt; } @@ -93,11 +95,11 @@ void EventCollector::onEvent(const Event &event) switch (event.edge) { case Edge::BEGIN: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B")); + _rec->emit(DurationEventBuilder(ts).build(event, "B")); break; case Edge::END: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E")); + _rec->emit(DurationEventBuilder(ts).build(event, "E")); break; } diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h index 8154be592..7daa4851f 100644 --- a/runtime/onert/core/src/util/EventCollector.h +++ b/runtime/onert/core/src/util/EventCollector.h @@ -19,6 +19,10 @@ #include "util/EventRecorder.h" +#include <vector> +#include <utility> +#include <string> + class EventCollector { public: @@ -31,8 +35,24 @@ public: struct Event { Edge edge; + uint32_t session_index; + uint32_t subg_index; std::string backend; + uint32_t op_index; + std::string op_name; + uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations + + // TODO deprecate this. label can be differ by writer. So let the writer decide label. std::string label; + + // user-defined data: pairs of (key, value) + std::vector<std::pair<std::string, std::string>> userData; + + Event(Edge a_edge, const std::string &a_backend, const std::string &a_label) + : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0), + op_seq_size(0), label(a_label) + { /* empty */ + } }; public: diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc deleted file mode 100644 index 6c03a5b9a..000000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/EventCollectorGlobal.h" - -#include <cassert> -#include <fstream> -#include <iostream> - -#include "util/ConfigSource.h" -#include "util/EventWriter.h" - -namespace onert -{ -namespace util -{ - -EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder} -{ - // DO NOTHING -} - -EventCollectorGlobal::~EventCollectorGlobal() -{ - if (!_recorder.empty()) - { - try - { - // TODO Need better way for saved file path than the hardcoded path - EventWriter{_recorder}.writeToFile("trace.global.json", - EventWriter::WriteFormat::CHROME_TRACING); - } - catch (const std::exception &e) - { - std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl; - } - } -} - -EventCollectorGlobal &EventCollectorGlobal::get() -{ - static EventCollectorGlobal instance; - return instance; -} - -EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag} -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} -EventDurationBlock::~EventDurationBlock() -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {} - -EventDurationManual::~EventDurationManual() -{ - // Check if it has called begin-end pair - assert(_pair); -} - -void EventDurationManual::begin() -{ - _pair = false; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} - -void EventDurationManual::end() -{ - assert(!_pair); - _pair = true; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h deleted file mode 100644 index 1027ec84d..000000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ -#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ - -#include "util/EventRecorder.h" -#include "util/EventCollector.h" - -namespace onert -{ -namespace util -{ - -/** - * @brief Singleton class for event collection from anywhere in code - * - */ -class EventCollectorGlobal -{ -public: - /** - * @brief Get the singleton object of this class - * - * @return EventCollectorGlobal& Singleton object - */ - static EventCollectorGlobal &get(); - -public: - /** - * @brief Getter for event collector object - * - * @return EventCollector& Collector object - */ - EventCollector &collector() { return _collector; } - -private: - EventCollectorGlobal(); - ~EventCollectorGlobal(); - -private: - EventRecorder _recorder; - EventCollector _collector; -}; - -/** - * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor - * - */ -class EventDurationBlock -{ -public: - /** - * @brief Raise a duration event with type of BEGIN - * - * @param tag A label for the duration event - */ - EventDurationBlock(const std::string &tag); - /** - * @brief Raise a duration event with type of END - * - */ - ~EventDurationBlock(); - -private: - std::string _tag; -}; - -/** - * @brief Helper class for emitting duration event which is handled manually - * - * Usage: - * { - * ... - * EventDurationManual duration("some tag"); - * duration.begin(); - * ... - * ... // Code for duration - * ... - * duration.end(); - * } - * - */ -class EventDurationManual -{ -public: - /** - * @brief Construct a new Event Duration Manual object - * - * @param tag A label for the duration object - */ - EventDurationManual(const std::string &tag); - /** - * @brief Destroy the Event Duration Manual object - * - */ - ~EventDurationManual(); - - /** - * @brief Raise a duration event with type of BEGIN - * - */ - void begin(); - /** - * @brief Raise a duration event with type of END - * - */ - void end(); - -private: - std::string _tag; - bool _pair; -}; - -} // namespace util -} // namespace onert - -/** - * Helper Macro Definitions - * - * HOW TO USE - * - * void f(args) - * { - * EVENT_DURATION_FUNCTION(); - * ... - * if(cond) - * { - * EVENT_DURATION_REGION("if branch"); - * ... - * } - * ... - * } - */ - -#define EVENT_DURATION_FUNCTION() \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ } - -#define EVENT_DURATION_REGION(tag) \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag } - -#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h index 7af4c7ddb..3ed40875f 100644 --- a/runtime/onert/core/src/util/EventRecorder.h +++ b/runtime/onert/core/src/util/EventRecorder.h @@ -27,8 +27,9 @@ struct Event { std::string name; std::string tid; - std::string ph; /* REQUIRED */ - std::string ts; /* REQUIRED */ + std::string ph; /* REQUIRED */ + std::string ts; /* REQUIRED */ + std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value) }; struct DurationEvent : public Event diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc index dacb40e64..8760a16db 100644 --- a/runtime/onert/core/src/util/EventWriter.cc +++ b/runtime/onert/core/src/util/EventWriter.cc @@ -89,6 +89,7 @@ void fill(Content &content, const Event &evt) content.flds.emplace_back("tid", evt.tid); content.flds.emplace_back("ph", evt.ph); content.flds.emplace_back("ts", evt.ts); + content.args = evt.args; } std::string object(const DurationEvent &evt) @@ -418,40 +419,7 @@ struct MDTableBuilder } // namespace -EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder) -{ - // DO NOTHING -} - -void EventWriter::writeToFiles(const std::string &base_filepath) -{ - // Note. According to an internal issue, let snpe json as just file name not '.snpe.json' - writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK); - writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING); - writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE); -} - -void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format) -{ - std::ofstream os{filepath, std::ofstream::out}; - switch (write_format) - { - case WriteFormat::CHROME_TRACING: - writeChromeTrace(os); - break; - case WriteFormat::SNPE_BENCHMARK: - writeSNPEBenchmark(os); - break; - case WriteFormat::MD_TABLE: - writeMDTable(os); - break; - default: - assert(!"Invalid value"); - break; - } -} - -void EventWriter::writeSNPEBenchmark(std::ostream &os) +void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders) { Json::Value root; auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue}; @@ -475,11 +443,14 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) // Memory { std::unordered_map<std::string, Stat> mem_stats; - for (auto &evt : _recorder.counter_events()) + for (auto &recorder : recorders) { - auto &mem_stat = mem_stats[evt.name]; - uint64_t val = std::stoull(evt.values.at("value")); - mem_stat.accumulate(val); + for (auto &evt : recorder->counter_events()) + { + auto &mem_stat = mem_stats[evt.name]; + uint64_t val = std::stoull(evt.values.at("value")); + mem_stat.accumulate(val); + } } auto &mem = exec_data["memory"] = Json::Value{Json::objectValue}; @@ -501,26 +472,29 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) // 2D keys : stats[tid][name] std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats; std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps; - for (auto &evt : _recorder.duration_events()) + for (auto &recorder : recorders) { - auto &stat = stats[evt.tid][evt.name]; - auto &begin_ts = begin_timestamps[evt.tid][evt.name]; - uint64_t timestamp = std::stoull(evt.ts); - if (evt.ph == "B") + for (auto &evt : recorder->duration_events()) { - if (begin_ts != 0) - throw std::runtime_error{"Invalid Data"}; - begin_ts = timestamp; - } - else if (evt.ph == "E") - { - if (begin_ts == 0 || timestamp < begin_ts) - throw std::runtime_error{"Invalid Data"}; - stat.accumulate(timestamp - begin_ts); - begin_ts = 0; + auto &stat = stats[evt.tid][evt.name]; + auto &begin_ts = begin_timestamps[evt.tid][evt.name]; + uint64_t timestamp = std::stoull(evt.ts); + if (evt.ph == "B") + { + if (begin_ts != 0) + throw std::runtime_error{"Invalid Data"}; + begin_ts = timestamp; + } + else if (evt.ph == "E") + { + if (begin_ts == 0 || timestamp < begin_ts) + throw std::runtime_error{"Invalid Data"}; + stat.accumulate(timestamp - begin_ts); + begin_ts = 0; + } + else + throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""}; } - else - throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""}; } for (auto &kv : begin_timestamps) @@ -545,30 +519,71 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) } } - os << root; + _os << root; } -void EventWriter::writeChromeTrace(std::ostream &os) +void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders) { - os << "{\n"; - os << " " << quote("traceEvents") << ": [\n"; + _os << "{\n"; + _os << " " << quote("traceEvents") << ": [\n"; - for (auto &evt : _recorder.duration_events()) + for (auto &recorder : recorders) { - os << " " << object(evt) << ",\n"; + flushOneRecord(*recorder); } - for (auto &evt : _recorder.counter_events()) + _os << " { }\n"; + _os << " ]\n"; + _os << "}\n"; +} + +void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder) +{ + for (auto &evt : recorder.duration_events()) { - os << " " << object(evt) << ",\n"; + _os << " " << object(evt) << ",\n"; } - os << " { }\n"; - os << " ]\n"; - os << "}\n"; + for (auto &evt : recorder.counter_events()) + { + _os << " " << object(evt) << ",\n"; + } } -void EventWriter::writeMDTable(std::ostream &os) +void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records) +{ + for (auto &recorder : records) + { + MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os); + } +} + +// initialization +std::mutex EventWriter::_mutex; + +void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder) { - MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os); + { + std::unique_lock<std::mutex> lock{_mutex}; + + _recorders.emplace_back(std::move(recorder)); + + if (--_ref_count > 0) + return; + } + // The caller of this method is the last instance that uses EventWriter. + // Let's write log files. + + // Note. According to an internal issue, let snpe json as just file name not '.snpe.json' + flush(WriteFormat::SNPE_BENCHMARK); + flush(WriteFormat::CHROME_TRACING); + flush(WriteFormat::MD_TABLE); +} + +void EventWriter::flush(WriteFormat write_format) +{ + auto *writer = _actual_writers[write_format].get(); + assert(writer); + + writer->flush(_recorders); } diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h index 7e838ca82..0dcd00be6 100644 --- a/runtime/onert/core/src/util/EventWriter.h +++ b/runtime/onert/core/src/util/EventWriter.h @@ -20,7 +20,49 @@ #include "EventRecorder.h" #include <string> -#include <ostream> +#include <vector> +#include <unordered_map> +#include <mutex> +#include <fstream> + +class EventFormatWriter +{ +public: + EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {} + virtual ~EventFormatWriter() { /* empty */} + + virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0; + +protected: + std::ofstream _os; +}; + +class SNPEWriter : public EventFormatWriter +{ +public: + SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; +}; + +class ChromeTracingWriter : public EventFormatWriter +{ +public: + ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; + +private: + void flushOneRecord(const EventRecorder &); +}; + +class MDTableWriter : public EventFormatWriter +{ +public: + MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; + +private: + void flushOneRecord(const EventRecorder &); +}; class EventWriter { @@ -32,20 +74,58 @@ public: MD_TABLE, }; -public: - EventWriter(const EventRecorder &recorder); + /** + * @brief Retuens a singleton object + */ + static EventWriter *get(const std::string &filename) + { + std::unique_lock<std::mutex> lock{_mutex}; -public: - void writeToFiles(const std::string &base_filepath); - void writeToFile(const std::string &filepath, WriteFormat write_format); + static EventWriter singleton(filename); + return &singleton; + } + + /** + * @brief Call this when observer which use EventWriter starts + */ + void startToUse() + { + std::unique_lock<std::mutex> lock{_mutex}; + _ref_count++; + } + + /** + * @brief Call this when observer which use EventWriter finishes. + * After multiple observers calls this method, the reference count will eventually be 0. + * Then, EventWriter will write profiling result file. + */ + void readyToFlush(std::unique_ptr<EventRecorder> &&recorder); private: - void writeSNPEBenchmark(std::ostream &os); - void writeChromeTrace(std::ostream &os); - void writeMDTable(std::ostream &os); + EventWriter(const std::string &filepath) : _ref_count(0) + { + std::string snpe_log_name(filepath); + std::string chrome_tracing_log_name(filepath + ".chrome.json"); + std::string md_table_log_name(filepath + ".table.md"); + + _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name); + _actual_writers[WriteFormat::CHROME_TRACING] = + std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name); + _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name); + }; + + void flush(WriteFormat write_format); private: - const EventRecorder &_recorder; + static std::mutex _mutex; + + // number of observer of an executor that want to write profiling data + int32_t _ref_count; + + // one recorder object per executor + std::vector<std::unique_ptr<EventRecorder>> _recorders; + + std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers; }; #endif // __ONERT_UTIL_EVENT_WRITER_H__ diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc index 1f468a8b5..3ed3080cc 100644 --- a/runtime/onert/core/src/util/ShapeInference.cc +++ b/runtime/onert/core/src/util/ShapeInference.cc @@ -128,11 +128,11 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha return broadcastShapes(lhs_shape, rhs_shape); } -ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank) +ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank) { if (axis < 0 || axis >= rank) { - throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis)); + throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis)); } ir::Shape out_shape; @@ -385,18 +385,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis) return out_shape; } -ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf) +template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf) { - ir::Shape out_shape(in_shape.dim(0)); + ir::Shape out_shape(fill_shape.dim(0)); for (int out_x = 0; out_x < out_shape.rank(); ++out_x) { - out_shape.dim(out_x) = in_buf[out_x]; + out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]); } return out_shape; } +// template instantiation +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf); +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf); + ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape) { assert(in_shape.rank() >= 2); diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc new file mode 100644 index 000000000..08a1b32a7 --- /dev/null +++ b/runtime/onert/core/src/util/TracingCtx.cc @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/TracingCtx.h" + +namespace onert +{ +namespace util +{ + +// initializing static member var +std::mutex TracingCtx::_session_id_mutex; + +} // namespace util +} // namespace onert diff --git a/runtime/onert/frontend/.clang-format b/runtime/onert/frontend/.clang-format new file mode 120000 index 000000000..83185fee3 --- /dev/null +++ b/runtime/onert/frontend/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8
\ No newline at end of file diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index c0003e402..f9c97b41b 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -68,7 +68,7 @@ public: * @param graph reference on subgraphs */ explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs) - : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr} + : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr} { _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA); } @@ -114,23 +114,19 @@ private: // Operations template <typename OpIR, typename... Args> const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args); - void loadConv2D(const Operator *op, ir::Graph &subg); - void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); - void loadTransposeConv(const Operator *op, ir::Graph &subg); - void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); - void loadReshape(const Operator *op, ir::Graph &subg); - void loadSoftmax(const Operator *op, ir::Graph &subg); - void loadConcatenation(const Operator *op, ir::Graph &subg); - void loadFC(const Operator *op, ir::Graph &subg); + + void loadAddV2(const Operator *op, ir::Graph &subg); + void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax); + void loadBatchMatMul(const Operator *op, ir::Graph &subg); void loadBinaryArithmetic(const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type); - void loadAddV2(const Operator *op, ir::Graph &subg); - void loadPack(const Operator *op, ir::Graph &subg); - void loadResizeBilinear(const Operator *op, ir::Graph &subg); - void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg); - void loadReduce(const Operator *op, ir::Graph &subg, - ir::operation::Reduce::ReduceType reduce_type); - void loadReduceAll(const Operator *op, ir::Graph &subg); + void loadComparison(const Operator *op, ir::Graph &subg); + void loadConcatenation(const Operator *op, ir::Graph &subg); + void loadConv2D(const Operator *op, ir::Graph &subg); + void loadCustom(const Operator *op, ir::Graph &subg); + void loadDepthToSpace(const Operator *op, ir::Graph &subg); + void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); + void loadEinsum(const Operator *op, ir::Graph &subg); void loadElementwiseActivation(const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, float alpha = 0.f, float beta = 0.f); @@ -138,25 +134,31 @@ private: ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type); void loadElementwiseUnary(const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type); + void loadFC(const Operator *op, ir::Graph &subg); + void loadFusedBatchNorm(const Operator *op, ir::Graph &subg); void loadGather(const Operator *op, ir::Graph &subg); - void loadCustom(const Operator *op, ir::Graph &subg); - void loadBatchMatMul(const Operator *op, ir::Graph &subg); - void loadSqueeze(const Operator *op, ir::Graph &subg); + void loadIf(const Operator *op, ir::Graph &subg); + void loadLeakyRelu(const Operator *op, ir::Graph &subg); + void loadLogSoftmax(const Operator *op, ir::Graph &subg); + void loadOneHot(const Operator *op, ir::Graph &subg); + void loadPack(const Operator *op, ir::Graph &subg); + void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); + void loadReduce(const Operator *op, ir::Graph &subg, + ir::operation::Reduce::ReduceType reduce_type); + void loadReduceAll(const Operator *op, ir::Graph &subg); + void loadReshape(const Operator *op, ir::Graph &subg); + void loadResizeBilinear(const Operator *op, ir::Graph &subg); + void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg); + void loadSoftmax(const Operator *op, ir::Graph &subg); + void loadSpaceToDepth(const Operator *op, ir::Graph &subg); void loadSplit(const Operator *op, ir::Graph &subg); void loadSplitV(const Operator *op, ir::Graph &subg); + void loadSqueeze(const Operator *op, ir::Graph &subg); void loadStridedSlice(const Operator *op, ir::Graph &subg); + void loadTransposeConv(const Operator *op, ir::Graph &subg); + void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg); void loadUnpack(const Operator *op, ir::Graph &subg); - void loadComparison(const Operator *op, ir::Graph &subg); - void loadEinsum(const Operator *op, ir::Graph &subg); - void loadOneHot(const Operator *op, ir::Graph &subg); - void loadIf(const Operator *op, ir::Graph &subg); void loadWhile(const Operator *op, ir::Graph &subg); - void loadArgMax(const Operator *op, ir::Graph &subg); - void loadFusedBatchNorm(const Operator *op, ir::Graph &subg); - void loadLogSoftmax(const Operator *op, ir::Graph &subg); - void loadSpaceToDepth(const Operator *op, ir::Graph &subg); - void loadLeakyRelu(const Operator *op, ir::Graph &subg); - void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg); void verifySubgraphIndex(int subg_index) { @@ -255,19 +257,26 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te { case TensorType::TensorType_FLOAT32: return ir::DataType::FLOAT32; + case TensorType::TensorType_FLOAT16: + return ir::DataType::FLOAT16; case TensorType::TensorType_INT32: return ir::DataType::INT32; - case TensorType::TensorType_BOOL: - return ir::DataType::BOOL8; case TensorType::TensorType_UINT8: return ir::DataType::QUANT_UINT8_ASYMM; - case TensorType::TensorType_INT8: - return ir::DataType::QUANT_INT8_ASYMM; case TensorType::TensorType_INT64: return ir::DataType::INT64; + // case TensorType::TensorType_STRING: + case TensorType::TensorType_BOOL: + return ir::DataType::BOOL8; + case TensorType::TensorType_INT16: + return ir::DataType::QUANT_INT16_ASYMM; + // case TensorType::TensorType_COMPLEX64 + case TensorType::TensorType_INT8: + return ir::DataType::QUANT_INT8_ASYMM; + // case TensorType::TensorType_FLOAT64 default: throw std::runtime_error( - std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); + std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); } } @@ -385,7 +394,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir: { size_t offset = unaligned_offset_start - aligned_offset_start; uint8_t *mmap_base = static_cast<uint8_t *>( - mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start)); + mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start)); data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size); munmap(mmap_base, mmap_size); } @@ -446,7 +455,7 @@ void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shap bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2; if (dim_metadata_size != !random_sparsity && !block2D_sparsity) throw std::runtime_error( - "sparsity is supported only for 2D tensor with random or 16x1 block sparsity."); + "sparsity is supported only for 2D tensor with random or 16x1 block sparsity."); const auto *src_metadata = src_sparsity->dim_metadata()->Get(0); if (src_metadata->format() != DimensionType::DimensionType_DENSE) @@ -514,8 +523,8 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code)) throw std::runtime_error( - std::string("loader doesn't support optional input tensor yet for ") - .append(EnumNameBuiltinOperator(builtin_code))); + std::string("loader doesn't support optional input tensor yet for ") + .append(EnumNameBuiltinOperator(builtin_code))); }; check_optional_input(); inputs.append(tensorIdxToOperandIdx(idx)); @@ -691,9 +700,9 @@ void BaseLoader<LoaderDomain>::loadFC(const Operator *op, ir::Graph &subg) const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param); const auto &input_operand = - subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT)); + subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT)); auto &weights_operand = - subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT)); + subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT)); if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 && ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) || weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM)) @@ -719,7 +728,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg) auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); auto attr_map = data_root.AsMap(); const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>( - attr_map["fused_activation_function"].AsInt8()); + attr_map["fused_activation_function"].AsInt8()); param.activation = convertActivation(fused_activation_func); } @@ -727,8 +736,18 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg) } template <typename LoaderDomain> +void BaseLoader<LoaderDomain>::loadDepthToSpace(const Operator *op, ir::Graph &subg) +{ + ir::operation::DepthToSpace::Param param; + const auto *options = op->builtin_options_as_DepthToSpaceOptions(); + param.block_size = options->block_size(); + + loadOperationTo<ir::operation::DepthToSpace>(op, subg, param); +} + +template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadBinaryArithmetic( - const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type) + const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type) { ir::operation::BinaryArithmetic::Param param; param.arithmetic_type = op_type; @@ -780,8 +799,8 @@ void BaseLoader<LoaderDomain>::loadPack(const Operator *op, ir::Graph &subg) template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadElementwiseActivation( - const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, - float alpha, float beta) + const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, + float alpha, float beta) { ir::operation::ElementwiseActivation::Param param; param.op_type = op_type; @@ -844,8 +863,8 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadElementwiseBinary( - const Operator *op, ir::Graph &subg, - ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) + const Operator *op, ir::Graph &subg, + ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) { ir::operation::ElementwiseBinary::Param param; param.op_type = op_type; @@ -870,7 +889,7 @@ void BaseLoader<LoaderDomain>::loadElementwiseUnary(const Operator *op, ir::Grap } }; qasymm8ToUint8( - subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT))); + subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT))); qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0))); } } @@ -915,8 +934,8 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su break; default: throw std::runtime_error( - std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) + - " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL)); + std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) + + " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL)); } loadOperationTo<ir::operation::BatchMatMul>(op, subg, param); @@ -959,15 +978,15 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) // Mapping from custom op name string to BuiltinOP enum std::map<std::string, BuiltinOP> builtin_map = { - {"AddV2", BuiltinOP::AddV2}, - {"All", BuiltinOP::ReduceAll}, - {"MatrixBandPart", BuiltinOP::MatrixBandPart}, - {"BatchMatMulV2", BuiltinOP::BatchMatMul}, - {"Einsum", BuiltinOP::Einsum}, - {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, - {"BroadcastTo", BuiltinOP::BroadcastTo}, - {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, - {"Erf", BuiltinOP::Erf}, + {"AddV2", BuiltinOP::AddV2}, + {"All", BuiltinOP::ReduceAll}, + {"MatrixBandPart", BuiltinOP::MatrixBandPart}, + {"BatchMatMulV2", BuiltinOP::BatchMatMul}, + {"Einsum", BuiltinOP::Einsum}, + {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, + {"BroadcastTo", BuiltinOP::BroadcastTo}, + {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, + {"Erf", BuiltinOP::Erf}, }; try @@ -1005,7 +1024,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) break; default: throw std::runtime_error{ - "Loader: Custom OP map is defined but operation loader function is not defined"}; + "Loader: Custom OP map is defined but operation loader function is not defined"}; } return; @@ -1120,7 +1139,7 @@ void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &sub break; default: throw std::runtime_error( - std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); } loadOperationTo<ir::operation::Comparison>(op, subg, param); @@ -1224,25 +1243,15 @@ void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg) } template <typename LoaderDomain> -void BaseLoader<LoaderDomain>::loadArgMax(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain>::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax) { - ir::operation::ArgMax::Param param; - const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type(); - switch (output_type) - { - case TensorType::TensorType_INT32: - case TensorType::TensorType_INT64: - param.output_type = tensorTypeToDataType(output_type); - break; - default: - throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64."); - } - auto am = loadOperationTo<ir::operation::ArgMax>(op, subg, param); + ir::operation::ArgMinMax::Param param; + const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type() + : op->builtin_options_as_ArgMinOptions()->output_type(); + param.output_type = tensorTypeToDataType(output_type); + param.is_arg_max = is_argmax; - auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS)); - if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 || - axisOperand.typeInfo().type() == ir::DataType::INT64))) - throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported."); + loadOperationTo<ir::operation::ArgMinMax>(op, subg, param); } template <typename LoaderDomain> @@ -1287,7 +1296,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op { auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ") - .append(EnumNameBuiltinOperator(builtin_code))); + .append(EnumNameBuiltinOperator(builtin_code))); } for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i) { @@ -1355,6 +1364,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_PACK: loadPack(op, subg); return; + case BuiltinOperator::BuiltinOperator_ELU: + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU); + return; case BuiltinOperator::BuiltinOperator_RELU: loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, ir::operation::ElementwiseActivation::infinity, 0.f); @@ -1383,6 +1395,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_SQRT: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT); return; + case BuiltinOperator::BuiltinOperator_SQUARE: + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE); + return; case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE: loadOperationTo<ir::operation::SquaredDifference>(op, subg); return; @@ -1499,7 +1514,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG); return; case BuiltinOperator::BuiltinOperator_ARG_MAX: - loadArgMax(op, subg); + loadArgMinMax(op, subg, true); + return; + case BuiltinOperator::BuiltinOperator_ARG_MIN: + loadArgMinMax(op, subg, false); return; case BuiltinOperator::BuiltinOperator_LOG: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG); @@ -1513,6 +1531,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_LOGICAL_NOT: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT); return; + case BuiltinOperator::BuiltinOperator_LOGICAL_AND: + loadElementwiseBinary(op, subg, + ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); + return; case BuiltinOperator::BuiltinOperator_LOGICAL_OR: loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); @@ -1556,9 +1578,12 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: loadUnidirectionalSequenceLSTM(op, subg); return; + case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE: + loadDepthToSpace(op, subg); + return; default: throw std::runtime_error( - std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); } } diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index 33e1709a8..0d7b3eab4 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -196,7 +196,7 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg) param.activation = convertActivation(options->fused_activation_function()); std::unique_ptr<ir::Operation> new_op( - new ir::operation::BCQFullyConnected(inputs, outputs, param)); + new ir::operation::BCQFullyConnected(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h index 0ff1f72a2..eb1775297 100644 --- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h +++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h @@ -2155,9 +2155,8 @@ enum ActivationFunctionType inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { static const ActivationFunctionType values[] = { - ActivationFunctionType_NONE, ActivationFunctionType_RELU, - ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, - ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; return values; } @@ -2218,9 +2217,8 @@ enum FullyConnectedOptionsWeightsFormat inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3] { static const FullyConnectedOptionsWeightsFormat values[] = { - FullyConnectedOptionsWeightsFormat_DEFAULT, - FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8, - FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32}; + FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8, + FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32}; return values; } @@ -2478,8 +2476,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab const circle::CustomQuantization *details_as_CustomQuantization() const { return details_type() == circle::QuantizationDetails_CustomQuantization - ? static_cast<const circle::CustomQuantization *>(details()) - : nullptr; + ? static_cast<const circle::CustomQuantization *>(details()) + : nullptr; } int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); } bool Verify(flatbuffers::Verifier &verifier) const @@ -2551,12 +2549,12 @@ struct QuantizationParametersBuilder }; inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters( - flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, - flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, - flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, - flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, - circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, - flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, + flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, + circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) { QuantizationParametersBuilder builder_(_fbb); builder_.add_quantized_dimension(quantized_dimension); @@ -2570,11 +2568,11 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters( } inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, - const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, - const std::vector<int64_t> *zero_point = nullptr, - circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, - flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, + const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, + const std::vector<int64_t> *zero_point = nullptr, + circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) { auto min__ = min ? _fbb.CreateVector<float>(*min) : 0; auto max__ = max ? _fbb.CreateVector<float>(*max) : 0; @@ -2789,20 +2787,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Int32Vector *array_segments_as_Int32Vector() const { return array_segments_type() == circle::SparseIndexVector_Int32Vector - ? static_cast<const circle::Int32Vector *>(array_segments()) - : nullptr; + ? static_cast<const circle::Int32Vector *>(array_segments()) + : nullptr; } const circle::Uint16Vector *array_segments_as_Uint16Vector() const { return array_segments_type() == circle::SparseIndexVector_Uint16Vector - ? static_cast<const circle::Uint16Vector *>(array_segments()) - : nullptr; + ? static_cast<const circle::Uint16Vector *>(array_segments()) + : nullptr; } const circle::Uint8Vector *array_segments_as_Uint8Vector() const { return array_segments_type() == circle::SparseIndexVector_Uint8Vector - ? static_cast<const circle::Uint8Vector *>(array_segments()) - : nullptr; + ? static_cast<const circle::Uint8Vector *>(array_segments()) + : nullptr; } circle::SparseIndexVector array_indices_type() const { @@ -2813,20 +2811,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Int32Vector *array_indices_as_Int32Vector() const { return array_indices_type() == circle::SparseIndexVector_Int32Vector - ? static_cast<const circle::Int32Vector *>(array_indices()) - : nullptr; + ? static_cast<const circle::Int32Vector *>(array_indices()) + : nullptr; } const circle::Uint16Vector *array_indices_as_Uint16Vector() const { return array_indices_type() == circle::SparseIndexVector_Uint16Vector - ? static_cast<const circle::Uint16Vector *>(array_indices()) - : nullptr; + ? static_cast<const circle::Uint16Vector *>(array_indices()) + : nullptr; } const circle::Uint8Vector *array_indices_as_Uint8Vector() const { return array_indices_type() == circle::SparseIndexVector_Uint8Vector - ? static_cast<const circle::Uint8Vector *>(array_indices()) - : nullptr; + ? static_cast<const circle::Uint8Vector *>(array_indices()) + : nullptr; } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2924,12 +2922,12 @@ struct DimensionMetadataBuilder }; inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata( - flatbuffers::FlatBufferBuilder &_fbb, - circle::DimensionType format = circle::DimensionType_DENSE, int32_t dense_size = 0, - circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE, - flatbuffers::Offset<void> array_segments = 0, - circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE, - flatbuffers::Offset<void> array_indices = 0) + flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE, + int32_t dense_size = 0, + circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE, + flatbuffers::Offset<void> array_segments = 0, + circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE, + flatbuffers::Offset<void> array_indices = 0) { DimensionMetadataBuilder builder_(_fbb); builder_.add_array_indices(array_indices); @@ -2961,7 +2959,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>( - VT_DIM_METADATA); + VT_DIM_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2987,8 +2985,8 @@ struct SparsityParametersBuilder fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); } void add_dim_metadata( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>> - dim_metadata) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>> + dim_metadata) { fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); } @@ -3006,11 +3004,11 @@ struct SparsityParametersBuilder }; inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>> - dim_metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>> + dim_metadata = 0) { SparsityParametersBuilder builder_(_fbb); builder_.add_dim_metadata(dim_metadata); @@ -3020,16 +3018,15 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( } inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr, - const std::vector<int32_t> *block_map = nullptr, - const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr, + const std::vector<int32_t> *block_map = nullptr, + const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr) { auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0; auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0; auto dim_metadata__ = - dim_metadata - ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata) - : 0; + dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata) + : 0; return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__); } @@ -3155,12 +3152,11 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset<Tensor> CreateTensorDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, - circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0, - const char *name = nullptr, - flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, bool is_variable = false, - flatbuffers::Offset<circle::SparsityParameters> sparsity = 0, - const std::vector<int32_t> *shape_signature = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, + circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0, + const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, + bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0, + const std::vector<int32_t> *shape_signature = nullptr) { auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0; auto name__ = name ? _fbb.CreateString(name) : 0; @@ -3190,7 +3186,7 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } @@ -3249,10 +3245,10 @@ struct Conv2DOptionsBuilder }; inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { Conv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -3287,7 +3283,7 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3344,9 +3340,9 @@ struct Pool2DOptionsBuilder }; inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { Pool2DOptionsBuilder builder_(_fbb); builder_.add_filter_height(filter_height); @@ -3381,7 +3377,7 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } @@ -3445,10 +3441,10 @@ struct DepthwiseConv2DOptionsBuilder }; inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { DepthwiseConv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -3499,12 +3495,12 @@ struct ConcatEmbeddingsOptionsBuilder fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); } void add_num_columns_per_channel( - flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); } void add_embedding_dim_per_channel( - flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); @@ -3523,9 +3519,9 @@ struct ConcatEmbeddingsOptionsBuilder }; inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) { ConcatEmbeddingsOptionsBuilder builder_(_fbb); builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); @@ -3540,9 +3536,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_ const std::vector<int32_t> *embedding_dim_per_channel = nullptr) { auto num_columns_per_channel__ = - num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0; + num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0; auto embedding_dim_per_channel__ = - embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0; + embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0; return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__, embedding_dim_per_channel__); } @@ -3609,7 +3605,7 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3653,9 +3649,9 @@ struct SVDFOptionsBuilder }; inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SVDFOptionsBuilder builder_(_fbb); builder_.add_rank(rank); @@ -3675,7 +3671,7 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3718,9 +3714,9 @@ struct RNNOptionsBuilder }; inline flatbuffers::Offset<RNNOptions> CreateRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { RNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3741,7 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3789,9 +3785,9 @@ struct SequenceRNNOptionsBuilder }; inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3814,7 +3810,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3869,9 +3865,9 @@ struct BidirectionalSequenceRNNOptionsBuilder }; inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool merge_outputs = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool merge_outputs = false, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3894,12 +3890,12 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } circle::FullyConnectedOptionsWeightsFormat weights_format() const { return static_cast<circle::FullyConnectedOptionsWeightsFormat>( - GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); + GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); } bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3955,11 +3951,11 @@ struct FullyConnectedOptionsBuilder }; inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - circle::FullyConnectedOptionsWeightsFormat weights_format = - circle::FullyConnectedOptionsWeightsFormat_DEFAULT, - bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + circle::FullyConnectedOptionsWeightsFormat weights_format = + circle::FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -4023,7 +4019,7 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4057,8 +4053,8 @@ struct ConcatenationOptionsBuilder }; inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); @@ -4076,7 +4072,7 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4109,8 +4105,8 @@ struct AddOptionsBuilder }; inline flatbuffers::Offset<AddOptions> CreateAddOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { AddOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4127,7 +4123,7 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4160,8 +4156,8 @@ struct MulOptionsBuilder }; inline flatbuffers::Offset<MulOptions> CreateMulOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { MulOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4178,7 +4174,7 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4211,8 +4207,8 @@ struct L2NormOptionsBuilder }; inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { L2NormOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4263,7 +4259,7 @@ struct LocalResponseNormalizationOptionsBuilder fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); } explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4303,7 +4299,7 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } @@ -4367,11 +4363,11 @@ struct LSTMOptionsBuilder }; inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, - circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, + circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL, + bool asymmetric_quantize_inputs = false) { LSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4396,7 +4392,7 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } @@ -4445,7 +4441,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder static_cast<uint8_t>(asymmetric_quantize_inputs), 0); } explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4461,10 +4457,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, + bool asymmetric_quantize_inputs = false) { UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4490,7 +4486,7 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } @@ -4546,7 +4542,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder static_cast<uint8_t>(asymmetric_quantize_inputs), 0); } explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4561,10 +4557,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder }; inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, - bool time_major = true, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, + bool time_major = true, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -5075,7 +5071,7 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5108,8 +5104,8 @@ struct SubOptionsBuilder }; inline flatbuffers::Offset<SubOptions> CreateSubOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { SubOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -5126,7 +5122,7 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5159,8 +5155,8 @@ struct DivOptionsBuilder }; inline flatbuffers::Offset<DivOptions> CreateDivOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { DivOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -7976,7 +7972,7 @@ struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -8014,8 +8010,8 @@ struct BCQFullyConnectedOptionsBuilder }; inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { BCQFullyConnectedOptionsBuilder builder_(_fbb); builder_.add_weights_hidden_size(weights_hidden_size); @@ -8035,7 +8031,7 @@ struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( - GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -8072,8 +8068,8 @@ struct InstanceNormOptionsBuilder }; inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions( - flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { InstanceNormOptionsBuilder builder_(_fbb); builder_.add_epsilon(epsilon); @@ -8191,632 +8187,632 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions - ? static_cast<const circle::Conv2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::Conv2DOptions *>(builtin_options()) + : nullptr; } const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions - ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options()) + : nullptr; } const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options()) + : nullptr; } const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions - ? static_cast<const circle::LSHProjectionOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LSHProjectionOptions *>(builtin_options()) + : nullptr; } const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions - ? static_cast<const circle::Pool2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::Pool2DOptions *>(builtin_options()) + : nullptr; } const circle::SVDFOptions *builtin_options_as_SVDFOptions() const { return builtin_options_type() == circle::BuiltinOptions_SVDFOptions - ? static_cast<const circle::SVDFOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SVDFOptions *>(builtin_options()) + : nullptr; } const circle::RNNOptions *builtin_options_as_RNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_RNNOptions - ? static_cast<const circle::RNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::RNNOptions *>(builtin_options()) + : nullptr; } const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions - ? static_cast<const circle::FullyConnectedOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::FullyConnectedOptions *>(builtin_options()) + : nullptr; } const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions - ? static_cast<const circle::SoftmaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SoftmaxOptions *>(builtin_options()) + : nullptr; } const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions - ? static_cast<const circle::ConcatenationOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ConcatenationOptions *>(builtin_options()) + : nullptr; } const circle::AddOptions *builtin_options_as_AddOptions() const { return builtin_options_type() == circle::BuiltinOptions_AddOptions - ? static_cast<const circle::AddOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::AddOptions *>(builtin_options()) + : nullptr; } const circle::L2NormOptions *builtin_options_as_L2NormOptions() const { return builtin_options_type() == circle::BuiltinOptions_L2NormOptions - ? static_cast<const circle::L2NormOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::L2NormOptions *>(builtin_options()) + : nullptr; } const circle::LocalResponseNormalizationOptions * builtin_options_as_LocalResponseNormalizationOptions() const { return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options()) + : nullptr; } const circle::LSTMOptions *builtin_options_as_LSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_LSTMOptions - ? static_cast<const circle::LSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LSTMOptions *>(builtin_options()) + : nullptr; } const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions - ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options()) + : nullptr; } const circle::CallOptions *builtin_options_as_CallOptions() const { return builtin_options_type() == circle::BuiltinOptions_CallOptions - ? static_cast<const circle::CallOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::CallOptions *>(builtin_options()) + : nullptr; } const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions - ? static_cast<const circle::ReshapeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ReshapeOptions *>(builtin_options()) + : nullptr; } const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const { return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions - ? static_cast<const circle::SkipGramOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SkipGramOptions *>(builtin_options()) + : nullptr; } const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions - ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options()) + : nullptr; } const circle::EmbeddingLookupSparseOptions * builtin_options_as_EmbeddingLookupSparseOptions() const { return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options()) + : nullptr; } const circle::MulOptions *builtin_options_as_MulOptions() const { return builtin_options_type() == circle::BuiltinOptions_MulOptions - ? static_cast<const circle::MulOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::MulOptions *>(builtin_options()) + : nullptr; } const circle::PadOptions *builtin_options_as_PadOptions() const { return builtin_options_type() == circle::BuiltinOptions_PadOptions - ? static_cast<const circle::PadOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::PadOptions *>(builtin_options()) + : nullptr; } const circle::GatherOptions *builtin_options_as_GatherOptions() const { return builtin_options_type() == circle::BuiltinOptions_GatherOptions - ? static_cast<const circle::GatherOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::GatherOptions *>(builtin_options()) + : nullptr; } const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions - ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options()) + : nullptr; } const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions - ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options()) + : nullptr; } const circle::TransposeOptions *builtin_options_as_TransposeOptions() const { return builtin_options_type() == circle::BuiltinOptions_TransposeOptions - ? static_cast<const circle::TransposeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::TransposeOptions *>(builtin_options()) + : nullptr; } const circle::ReducerOptions *builtin_options_as_ReducerOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReducerOptions - ? static_cast<const circle::ReducerOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ReducerOptions *>(builtin_options()) + : nullptr; } const circle::SubOptions *builtin_options_as_SubOptions() const { return builtin_options_type() == circle::BuiltinOptions_SubOptions - ? static_cast<const circle::SubOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SubOptions *>(builtin_options()) + : nullptr; } const circle::DivOptions *builtin_options_as_DivOptions() const { return builtin_options_type() == circle::BuiltinOptions_DivOptions - ? static_cast<const circle::DivOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::DivOptions *>(builtin_options()) + : nullptr; } const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const { return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions - ? static_cast<const circle::SqueezeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SqueezeOptions *>(builtin_options()) + : nullptr; } const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions - ? static_cast<const circle::SequenceRNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SequenceRNNOptions *>(builtin_options()) + : nullptr; } const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions - ? static_cast<const circle::StridedSliceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::StridedSliceOptions *>(builtin_options()) + : nullptr; } const circle::ExpOptions *builtin_options_as_ExpOptions() const { return builtin_options_type() == circle::BuiltinOptions_ExpOptions - ? static_cast<const circle::ExpOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ExpOptions *>(builtin_options()) + : nullptr; } const circle::TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == circle::BuiltinOptions_TopKV2Options - ? static_cast<const circle::TopKV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::TopKV2Options *>(builtin_options()) + : nullptr; } const circle::SplitOptions *builtin_options_as_SplitOptions() const { return builtin_options_type() == circle::BuiltinOptions_SplitOptions - ? static_cast<const circle::SplitOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SplitOptions *>(builtin_options()) + : nullptr; } const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions - ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options()) + : nullptr; } const circle::CastOptions *builtin_options_as_CastOptions() const { return builtin_options_type() == circle::BuiltinOptions_CastOptions - ? static_cast<const circle::CastOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::CastOptions *>(builtin_options()) + : nullptr; } const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions - ? static_cast<const circle::DequantizeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::DequantizeOptions *>(builtin_options()) + : nullptr; } const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions - ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options()) + : nullptr; } const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions - ? static_cast<const circle::ArgMaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ArgMaxOptions *>(builtin_options()) + : nullptr; } const circle::LessOptions *builtin_options_as_LessOptions() const { return builtin_options_type() == circle::BuiltinOptions_LessOptions - ? static_cast<const circle::LessOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LessOptions *>(builtin_options()) + : nullptr; } const circle::NegOptions *builtin_options_as_NegOptions() const { return builtin_options_type() == circle::BuiltinOptions_NegOptions - ? static_cast<const circle::NegOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::NegOptions *>(builtin_options()) + : nullptr; } const circle::PadV2Options *builtin_options_as_PadV2Options() const { return builtin_options_type() == circle::BuiltinOptions_PadV2Options - ? static_cast<const circle::PadV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::PadV2Options *>(builtin_options()) + : nullptr; } const circle::GreaterOptions *builtin_options_as_GreaterOptions() const { return builtin_options_type() == circle::BuiltinOptions_GreaterOptions - ? static_cast<const circle::GreaterOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::GreaterOptions *>(builtin_options()) + : nullptr; } const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions - ? static_cast<const circle::GreaterEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::GreaterEqualOptions *>(builtin_options()) + : nullptr; } const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions - ? static_cast<const circle::LessEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LessEqualOptions *>(builtin_options()) + : nullptr; } const circle::SelectOptions *builtin_options_as_SelectOptions() const { return builtin_options_type() == circle::BuiltinOptions_SelectOptions - ? static_cast<const circle::SelectOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SelectOptions *>(builtin_options()) + : nullptr; } const circle::SliceOptions *builtin_options_as_SliceOptions() const { return builtin_options_type() == circle::BuiltinOptions_SliceOptions - ? static_cast<const circle::SliceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SliceOptions *>(builtin_options()) + : nullptr; } const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions - ? static_cast<const circle::TransposeConvOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::TransposeConvOptions *>(builtin_options()) + : nullptr; } const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions - ? static_cast<const circle::SparseToDenseOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SparseToDenseOptions *>(builtin_options()) + : nullptr; } const circle::TileOptions *builtin_options_as_TileOptions() const { return builtin_options_type() == circle::BuiltinOptions_TileOptions - ? static_cast<const circle::TileOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::TileOptions *>(builtin_options()) + : nullptr; } const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions - ? static_cast<const circle::ExpandDimsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ExpandDimsOptions *>(builtin_options()) + : nullptr; } const circle::EqualOptions *builtin_options_as_EqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_EqualOptions - ? static_cast<const circle::EqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::EqualOptions *>(builtin_options()) + : nullptr; } const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions - ? static_cast<const circle::NotEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::NotEqualOptions *>(builtin_options()) + : nullptr; } const circle::ShapeOptions *builtin_options_as_ShapeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ShapeOptions - ? static_cast<const circle::ShapeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ShapeOptions *>(builtin_options()) + : nullptr; } const circle::PowOptions *builtin_options_as_PowOptions() const { return builtin_options_type() == circle::BuiltinOptions_PowOptions - ? static_cast<const circle::PowOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::PowOptions *>(builtin_options()) + : nullptr; } const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const { return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions - ? static_cast<const circle::ArgMinOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ArgMinOptions *>(builtin_options()) + : nullptr; } const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions - ? static_cast<const circle::FakeQuantOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::FakeQuantOptions *>(builtin_options()) + : nullptr; } const circle::PackOptions *builtin_options_as_PackOptions() const { return builtin_options_type() == circle::BuiltinOptions_PackOptions - ? static_cast<const circle::PackOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::PackOptions *>(builtin_options()) + : nullptr; } const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions - ? static_cast<const circle::LogicalOrOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LogicalOrOptions *>(builtin_options()) + : nullptr; } const circle::OneHotOptions *builtin_options_as_OneHotOptions() const { return builtin_options_type() == circle::BuiltinOptions_OneHotOptions - ? static_cast<const circle::OneHotOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::OneHotOptions *>(builtin_options()) + : nullptr; } const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions - ? static_cast<const circle::LogicalAndOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LogicalAndOptions *>(builtin_options()) + : nullptr; } const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions - ? static_cast<const circle::LogicalNotOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LogicalNotOptions *>(builtin_options()) + : nullptr; } const circle::UnpackOptions *builtin_options_as_UnpackOptions() const { return builtin_options_type() == circle::BuiltinOptions_UnpackOptions - ? static_cast<const circle::UnpackOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::UnpackOptions *>(builtin_options()) + : nullptr; } const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const { return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions - ? static_cast<const circle::FloorDivOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::FloorDivOptions *>(builtin_options()) + : nullptr; } const circle::SquareOptions *builtin_options_as_SquareOptions() const { return builtin_options_type() == circle::BuiltinOptions_SquareOptions - ? static_cast<const circle::SquareOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SquareOptions *>(builtin_options()) + : nullptr; } const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions - ? static_cast<const circle::ZerosLikeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ZerosLikeOptions *>(builtin_options()) + : nullptr; } const circle::FillOptions *builtin_options_as_FillOptions() const { return builtin_options_type() == circle::BuiltinOptions_FillOptions - ? static_cast<const circle::FillOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::FillOptions *>(builtin_options()) + : nullptr; } const circle::BidirectionalSequenceLSTMOptions * builtin_options_as_BidirectionalSequenceLSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions - ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; } const circle::BidirectionalSequenceRNNOptions * builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions - ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options()) + : nullptr; } const circle::UnidirectionalSequenceLSTMOptions * builtin_options_as_UnidirectionalSequenceLSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions - ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; } const circle::FloorModOptions *builtin_options_as_FloorModOptions() const { return builtin_options_type() == circle::BuiltinOptions_FloorModOptions - ? static_cast<const circle::FloorModOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::FloorModOptions *>(builtin_options()) + : nullptr; } const circle::RangeOptions *builtin_options_as_RangeOptions() const { return builtin_options_type() == circle::BuiltinOptions_RangeOptions - ? static_cast<const circle::RangeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::RangeOptions *>(builtin_options()) + : nullptr; } const circle::ResizeNearestNeighborOptions * builtin_options_as_ResizeNearestNeighborOptions() const { return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions - ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options()) + : nullptr; } const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions - ? static_cast<const circle::LeakyReluOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::LeakyReluOptions *>(builtin_options()) + : nullptr; } const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions - ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options()) + : nullptr; } const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions - ? static_cast<const circle::MirrorPadOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::MirrorPadOptions *>(builtin_options()) + : nullptr; } const circle::AbsOptions *builtin_options_as_AbsOptions() const { return builtin_options_type() == circle::BuiltinOptions_AbsOptions - ? static_cast<const circle::AbsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::AbsOptions *>(builtin_options()) + : nullptr; } const circle::SplitVOptions *builtin_options_as_SplitVOptions() const { return builtin_options_type() == circle::BuiltinOptions_SplitVOptions - ? static_cast<const circle::SplitVOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SplitVOptions *>(builtin_options()) + : nullptr; } const circle::UniqueOptions *builtin_options_as_UniqueOptions() const { return builtin_options_type() == circle::BuiltinOptions_UniqueOptions - ? static_cast<const circle::UniqueOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::UniqueOptions *>(builtin_options()) + : nullptr; } const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const { return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options - ? static_cast<const circle::ReverseV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ReverseV2Options *>(builtin_options()) + : nullptr; } const circle::AddNOptions *builtin_options_as_AddNOptions() const { return builtin_options_type() == circle::BuiltinOptions_AddNOptions - ? static_cast<const circle::AddNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::AddNOptions *>(builtin_options()) + : nullptr; } const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const { return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions - ? static_cast<const circle::GatherNdOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::GatherNdOptions *>(builtin_options()) + : nullptr; } const circle::CosOptions *builtin_options_as_CosOptions() const { return builtin_options_type() == circle::BuiltinOptions_CosOptions - ? static_cast<const circle::CosOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::CosOptions *>(builtin_options()) + : nullptr; } const circle::WhereOptions *builtin_options_as_WhereOptions() const { return builtin_options_type() == circle::BuiltinOptions_WhereOptions - ? static_cast<const circle::WhereOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::WhereOptions *>(builtin_options()) + : nullptr; } const circle::RankOptions *builtin_options_as_RankOptions() const { return builtin_options_type() == circle::BuiltinOptions_RankOptions - ? static_cast<const circle::RankOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::RankOptions *>(builtin_options()) + : nullptr; } const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions - ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options()) + : nullptr; } const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions - ? static_cast<const circle::MatrixDiagOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::MatrixDiagOptions *>(builtin_options()) + : nullptr; } const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const { return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions - ? static_cast<const circle::QuantizeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::QuantizeOptions *>(builtin_options()) + : nullptr; } const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions - ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options()) + : nullptr; } const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const { return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions - ? static_cast<const circle::HardSwishOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::HardSwishOptions *>(builtin_options()) + : nullptr; } const circle::IfOptions *builtin_options_as_IfOptions() const { return builtin_options_type() == circle::BuiltinOptions_IfOptions - ? static_cast<const circle::IfOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::IfOptions *>(builtin_options()) + : nullptr; } const circle::WhileOptions *builtin_options_as_WhileOptions() const { return builtin_options_type() == circle::BuiltinOptions_WhileOptions - ? static_cast<const circle::WhileOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::WhileOptions *>(builtin_options()) + : nullptr; } const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions - ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options()) + : nullptr; } const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options - ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options()) + : nullptr; } const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options - ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options()) + : nullptr; } const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions - ? static_cast<const circle::ScatterNdOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::ScatterNdOptions *>(builtin_options()) + : nullptr; } const circle::SelectV2Options *builtin_options_as_SelectV2Options() const { return builtin_options_type() == circle::BuiltinOptions_SelectV2Options - ? static_cast<const circle::SelectV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SelectV2Options *>(builtin_options()) + : nullptr; } const circle::DensifyOptions *builtin_options_as_DensifyOptions() const { return builtin_options_type() == circle::BuiltinOptions_DensifyOptions - ? static_cast<const circle::DensifyOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::DensifyOptions *>(builtin_options()) + : nullptr; } const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions - ? static_cast<const circle::SegmentSumOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::SegmentSumOptions *>(builtin_options()) + : nullptr; } const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions - ? static_cast<const circle::BatchMatMulOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BatchMatMulOptions *>(builtin_options()) + : nullptr; } const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const { return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions - ? static_cast<const circle::BCQGatherOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BCQGatherOptions *>(builtin_options()) + : nullptr; } const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const { return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions - ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options()) + : nullptr; } const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const { return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions - ? static_cast<const circle::InstanceNormOptions *>(builtin_options()) - : nullptr; + ? static_cast<const circle::InstanceNormOptions *>(builtin_options()) + : nullptr; } const flatbuffers::Vector<uint8_t> *custom_options() const { @@ -9558,7 +9554,7 @@ struct OperatorBuilder static_cast<int8_t>(custom_options_format), 0); } void add_mutating_variable_inputs( - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) { fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); } @@ -9580,15 +9576,15 @@ struct OperatorBuilder }; inline flatbuffers::Offset<Operator> CreateOperator( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, - circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, - flatbuffers::Offset<void> builtin_options = 0, - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, - circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, + circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) { OperatorBuilder builder_(_fbb); builder_.add_intermediates(intermediates); @@ -9604,20 +9600,20 @@ inline flatbuffers::Offset<Operator> CreateOperator( } inline flatbuffers::Offset<Operator> CreateOperatorDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, - circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, - flatbuffers::Offset<void> builtin_options = 0, - const std::vector<uint8_t> *custom_options = nullptr, - circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, - const std::vector<uint8_t> *mutating_variable_inputs = nullptr, - const std::vector<int32_t> *intermediates = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, + circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + const std::vector<uint8_t> *custom_options = nullptr, + circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, + const std::vector<uint8_t> *mutating_variable_inputs = nullptr, + const std::vector<int32_t> *intermediates = nullptr) { auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0; auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0; auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0; auto mutating_variable_inputs__ = - mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0; + mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0; auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0; return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type, builtin_options, custom_options__, custom_options_format, @@ -9651,7 +9647,7 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>( - VT_OPERATORS); + VT_OPERATORS); } const flatbuffers::String *name() const { @@ -9693,7 +9689,7 @@ struct SubGraphBuilder fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); } void add_operators( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators) { fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); } @@ -9719,13 +9715,13 @@ struct SubGraphBuilder }; inline flatbuffers::Offset<SubGraph> CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0, - flatbuffers::Offset<flatbuffers::String> name = 0, - circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0, + flatbuffers::Offset<flatbuffers::String> name = 0, + circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) { SubGraphBuilder builder_(_fbb); builder_.add_name(name); @@ -9738,17 +9734,17 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph( } inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr, - const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, - const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr, - const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr, + const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, + const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr, + const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) { auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0; auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0; auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0; auto operators__ = - operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0; + operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0; auto name__ = name ? _fbb.CreateString(name) : 0; return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__, data_format); @@ -9893,12 +9889,12 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>( - VT_OPERATOR_CODES); + VT_OPERATOR_CODES); } const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>( - VT_SUBGRAPHS); + VT_SUBGRAPHS); } const flatbuffers::String *description() const { @@ -9915,7 +9911,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>( - VT_METADATA); + VT_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -9939,13 +9935,13 @@ struct ModelBuilder flatbuffers::uoffset_t start_; void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); } void add_operator_codes( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>> - operator_codes) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>> + operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } void add_subgraphs( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs) { fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); } @@ -9963,7 +9959,7 @@ struct ModelBuilder fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); } void add_metadata( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata) { fbb_.AddOffset(Model::VT_METADATA, metadata); } @@ -9981,14 +9977,14 @@ struct ModelBuilder }; inline flatbuffers::Offset<Model> CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>> - operator_codes = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0, - flatbuffers::Offset<flatbuffers::String> description = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>> + operator_codes = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0, + flatbuffers::Offset<flatbuffers::String> description = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0) { ModelBuilder builder_(_fbb); builder_.add_metadata(metadata); @@ -10002,24 +9998,24 @@ inline flatbuffers::Offset<Model> CreateModel( } inline flatbuffers::Offset<Model> CreateModelDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr, - const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr, - const char *description = nullptr, - const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr, - const std::vector<int32_t> *metadata_buffer = nullptr, - const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr, + const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr, + const std::vector<int32_t> *metadata_buffer = nullptr, + const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr) { auto operator_codes__ = - operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes) - : 0; + operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes) + : 0; auto subgraphs__ = - subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0; + subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0; auto description__ = description ? _fbb.CreateString(description) : 0; auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0; auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0; auto metadata__ = - metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0; + metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0; return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__, metadata_buffer__, metadata__); } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc index 81cd38f4f..63036a398 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc @@ -20,7 +20,9 @@ // TODO Support multiple subgraphs ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept - : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}} + : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>( + _subgraphs.get())}, + _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}} { if (model->allowedToFp16()) { diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h index 5f0650b9a..bd61f9d86 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h @@ -23,6 +23,7 @@ #include "ir/Graph.h" #include "ir/Subgraphs.h" #include "exec/IExecutor.h" +#include "util/TracingCtx.h" struct ANeuralNetworksCompilation { @@ -40,6 +41,14 @@ public: private: std::shared_ptr<onert::ir::Subgraphs> _subgraphs; + // TODO Refine the ownership of TracingCtx + // In case of nnfw API, nnfw_session has ownership of TracingCtx. + // In case of nnapi, there is no concept of session and primary model might have the ownership + // of TracingCtx. + // Since we don't support multiple models yet with nnapi in ONE, let's implement this later + // and let's make it work with one model for now. + std::unique_ptr<onert::util::TracingCtx> _tracing_ctx; + std::shared_ptr<onert::compiler::Compiler> _compiler; std::shared_ptr<onert::exec::ExecutorMap> _executors; }; diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc index 2bea729be..b0ea51917 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc @@ -20,7 +20,7 @@ #include "util/logging.h" ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution) - : _execution{execution} + : _execution{execution} { // DO NOTHING } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc index 6114b74b0..21c7cdd6f 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc @@ -140,8 +140,8 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other // words, we can assume that io_layout from nnapi always is the same as layout of the used @@ -173,8 +173,8 @@ bool ANeuralNetworksExecution::setOptionalInput(uint32_t index, const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // ANeuralNetworksExecution::setInput() uses only shape information ANeuralNetworksOperandType optional_input_type; @@ -208,8 +208,8 @@ bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOp const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other // words, we can assume that io_layout from nnapi always is the same as layout of the used diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h index 1f4b868f6..70c5d2a4b 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h @@ -27,7 +27,7 @@ struct ANeuralNetworksExecution { public: ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors) - : _execution{std::make_shared<onert::exec::Execution>(executors)} + : _execution{std::make_shared<onert::exec::Execution>(executors)} { // DO NOTHING } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc index 97b820aea..3e2bea114 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc @@ -27,7 +27,7 @@ // ANeuralNetworksModel // ANeuralNetworksModel::ANeuralNetworksModel() noexcept - : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false} + : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false} { _graph = std::make_shared<onert::ir::Graph>(); } @@ -72,12 +72,12 @@ bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, s if (copy) { _graph->operands().at(ind).data( - std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length)); + std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length)); } else { _graph->operands().at(ind).data( - std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length)); + std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length)); } } catch (const std::exception &e) @@ -111,9 +111,9 @@ bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint3 if (type == ANEURALNETWORKS_FULLY_CONNECTED) { const auto &input_operand = - _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT)); + _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT)); auto &weights_operand = - _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT)); + _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT)); if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 && weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM) { diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc index 63d4e3c09..94b8f02f5 100644 --- a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc +++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc @@ -39,6 +39,13 @@ DataType NNAPIConvert::getDataType(OperandCode type) case ANEURALNETWORKS_BOOL: case ANEURALNETWORKS_TENSOR_BOOL8: return DataType::BOOL8; + case ANEURALNETWORKS_TENSOR_FLOAT16: + case ANEURALNETWORKS_FLOAT16: + return DataType::FLOAT16; + case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL: + return DataType::QUANT_INT8_SYMM_PER_CHANNEL; + case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED: + return DataType::QUANT_INT8_ASYMM; default: throw std::runtime_error("Unsupported type"); } diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc index a84ce1b8d..9ecb7d190 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc @@ -107,7 +107,7 @@ getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivat } OperationFactory::Generator getElementwiseBinaryGenerator( - const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) + const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) { return [op_type](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2); @@ -182,7 +182,7 @@ getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::Arith param.arithmetic_type = op_type; const auto activation_index = OperandIndex{init_param.inputs[2]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); return new operation::BinaryArithmetic{inputs, outputs, param}; }; @@ -221,12 +221,12 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type) const auto activation_index = OperandIndex{init_param.inputs[6]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); param.stride = makeStride(operands, hstride_index, vstride_index); param.kw = getUint32Scalar(operands, kw_index); param.kh = operands.at(kh_index).asScalar<uint32_t>(); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } else // support explicit padding { @@ -259,7 +259,7 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type) param.kw = getUint32Scalar(operands, kw_index); param.kh = getUint32Scalar(operands, kh_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } return new operation::Pool2D{inputs, outputs, param}; }; @@ -382,11 +382,11 @@ OperationFactory::OperationFactory() const auto activation_index = OperandIndex{init_param.inputs[7]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); param.stride = makeStride(operands, hstride_index, vstride_index); param.multiplier = getUint32Scalar(operands, multiplier_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } else { @@ -417,7 +417,7 @@ OperationFactory::OperationFactory() param.stride = makeStride(operands, hstride_index, vstride_index); param.multiplier = getUint32Scalar(operands, multiplier_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } // TODO set dilation @@ -490,7 +490,7 @@ OperationFactory::OperationFactory() operation::FullyConnected::Param param; const auto activation_index = OperandIndex{init_param.inputs[3]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); param.weights_format = FullyConnectedWeightsFormat::Default; return new operation::FullyConnected{inputs, outputs, param}; @@ -517,7 +517,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_CAST] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); // ANEURALNETWORKS_CAST_EX is deprecated // TODO Remove ANEURALNETWORKS_CAST_EX @@ -557,14 +557,14 @@ OperationFactory::OperationFactory() const auto activation_index = OperandIndex{init_param.inputs[6]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); param.stride = makeStride(operands, hstride_index, vstride_index); param.dilation.width_factor = 1; param.dilation.height_factor = 1; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } else if (init_param.input_count == 10) // support explicit padding { @@ -595,7 +595,7 @@ OperationFactory::OperationFactory() param.dilation.height_factor = 1; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } else if (init_param.input_count == 13) // support dilation { @@ -633,7 +633,7 @@ OperationFactory::OperationFactory() param.dilation.height_factor = height_factor; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } else { @@ -644,19 +644,19 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_ADD] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD); _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD]; _map[ANEURALNETWORKS_REDUCE_SUM] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM); // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM]; _map[ANEURALNETWORKS_SUB] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -708,7 +708,7 @@ OperationFactory::OperationFactory() param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>(); param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>(); param.shrink_axis_mask = - operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>(); + operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>(); return new operation::StridedSlice{inputs, outputs, param}; }; @@ -716,7 +716,7 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>; _map[ANEURALNETWORKS_MUL] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL); _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -758,15 +758,15 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f); + onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f); _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG); - _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::LOGISTIC); + _map[ANEURALNETWORKS_LOGISTIC] = + getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC); _map[ANEURALNETWORKS_DIV] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV); _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP); @@ -780,16 +780,16 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>; _map[ANEURALNETWORKS_GREATER] = - getComparisonGenerator(operation::Comparison::ComparisonType::Greater); + getComparisonGenerator(operation::Comparison::ComparisonType::Greater); _map[ANEURALNETWORKS_GREATER_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual); _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less); _map[ANEURALNETWORKS_LESS_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual); _map[ANEURALNETWORKS_NOT_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual); _map[ANEURALNETWORKS_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::Equal); + getComparisonGenerator(operation::Comparison::ComparisonType::Equal); // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX @@ -838,13 +838,13 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_ALL] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL); _map[ANEURALNETWORKS_REDUCE_ANY] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY); _map[ANEURALNETWORKS_REDUCE_MAX] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX); // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX @@ -873,8 +873,8 @@ OperationFactory::OperationFactory() return new operation::Comparison{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator( - operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); + _map[ANEURALNETWORKS_LOGICAL_AND] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX @@ -902,7 +902,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_RSQRT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -939,8 +939,8 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT]; _map[ANEURALNETWORKS_RELU] = - getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, - onert::ir::operation::ElementwiseActivation::infinity, 0); + getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, + onert::ir::operation::ElementwiseActivation::infinity, 0); _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -986,10 +986,10 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f); + onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f); _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f); + onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f); _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1031,13 +1031,13 @@ OperationFactory::OperationFactory() operation::RNN::Param param; const auto activation_index = OperandIndex{init_param.inputs[5]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); return new operation::RNN{inputs, outputs, param}; }; _map[ANEURALNETWORKS_FLOOR] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR); _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param, Operands &) { @@ -1169,21 +1169,21 @@ OperationFactory::OperationFactory() const auto vstride_index = OperandIndex{init_param.inputs[5]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); param.stride = makeStride(operands, hstride_index, vstride_index); return new operation::TransposeConv{inputs, outputs, param}; }; _map[ANEURALNETWORKS_SQRT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); // ANEURALNETWORKS_SQRT_EX is deprecated // TODO Remove ANEURALNETWORKS_SQRT_EX _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT]; - _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator( - operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); + _map[ANEURALNETWORKS_LOGICAL_OR] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX @@ -1211,7 +1211,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_LOGICAL_NOT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX @@ -1370,9 +1370,9 @@ OperationFactory::OperationFactory() // 2 -> Cell State Out Tensor Index const OperandIndex scratch_buffer_index; OperandIndex output_state_index = - init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex(); + init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex(); OperandIndex cell_state_index = - init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex(); + init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex(); const OperandIndex output_index = OperandIndex{init_param.outputs[0]}; OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index, output_index}; @@ -1519,19 +1519,39 @@ OperationFactory::OperationFactory() // 1 -> Axis Tensor Index OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - operation::ArgMax::Param param; + operation::ArgMinMax::Param param; // NNAPI ARGMAX output type is always int32 param.output_type = DataType::INT32; + param.is_arg_max = true; - return new operation::ArgMax{inputs, outputs, param}; + return new operation::ArgMinMax{inputs, outputs, param}; }; // ANEURALNETWORKS_ARGMAX_EX is deprecated // TODO Remove ANEURALNETWORKS_ARGMAX_EX _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX]; + _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::ArgMinMax::Param param; + // NNAPI ARGMIN output type is always int32 + param.output_type = DataType::INT32; + param.is_arg_max = false; + + return new operation::ArgMinMax{inputs, outputs, param}; + }; + _map[ANEURALNETWORKS_DEQUANTIZE] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE); _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1608,7 +1628,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_MIN] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN); // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX @@ -1689,10 +1709,10 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD]; _map[ANEURALNETWORKS_MINIMUM] = - getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN); + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN); _map[ANEURALNETWORKS_MAXIMUM] = - getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX); + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX); _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -1719,7 +1739,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_COS_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS); _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN); @@ -1733,10 +1753,10 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_PROD] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD); _map[ANEURALNETWORKS_ROUND_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND); _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1764,7 +1784,7 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>; _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE); // Each input should be interpreted as follows: // 0 -> Input Tensor Index // 1 -> Multiple Tensor Index @@ -1904,7 +1924,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_QUANTIZE] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE); } Operation *OperationFactory::create(ANeuralNetworksOperationType type, diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h index 367cf74db..74e187421 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h @@ -40,7 +40,7 @@ public: public: using Generator = - std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>; + std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>; public: static OperationFactory &get(); diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h index c6e9147cd..8e1b84e29 100644 --- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h +++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h @@ -1710,9 +1710,8 @@ enum ActivationFunctionType inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { static const ActivationFunctionType values[] = { - ActivationFunctionType_NONE, ActivationFunctionType_RELU, - ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, - ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; return values; } @@ -1768,8 +1767,8 @@ enum FullyConnectedOptionsWeightsFormat inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] { static const FullyConnectedOptionsWeightsFormat values[] = { - FullyConnectedOptionsWeightsFormat_DEFAULT, - FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; return values; } @@ -1981,8 +1980,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab const CustomQuantization *details_as_CustomQuantization() const { return details_type() == QuantizationDetails_CustomQuantization - ? static_cast<const CustomQuantization *>(details()) - : nullptr; + ? static_cast<const CustomQuantization *>(details()) + : nullptr; } int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); } bool Verify(flatbuffers::Verifier &verifier) const @@ -2072,17 +2071,17 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, - const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, - const std::vector<int64_t> *zero_point = nullptr, - QuantizationDetails details_type = QuantizationDetails_NONE, - flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, + const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, + const std::vector<int64_t> *zero_point = nullptr, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) { return onert_tflite::CreateQuantizationParameters( - _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0, - scale ? _fbb.CreateVector<float>(*scale) : 0, - zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details, - quantized_dimension); + _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0, + scale ? _fbb.CreateVector<float>(*scale) : 0, + zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details, + quantized_dimension); } struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2272,20 +2271,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Int32Vector *array_segments_as_Int32Vector() const { return array_segments_type() == SparseIndexVector_Int32Vector - ? static_cast<const Int32Vector *>(array_segments()) - : nullptr; + ? static_cast<const Int32Vector *>(array_segments()) + : nullptr; } const Uint16Vector *array_segments_as_Uint16Vector() const { return array_segments_type() == SparseIndexVector_Uint16Vector - ? static_cast<const Uint16Vector *>(array_segments()) - : nullptr; + ? static_cast<const Uint16Vector *>(array_segments()) + : nullptr; } const Uint8Vector *array_segments_as_Uint8Vector() const { return array_segments_type() == SparseIndexVector_Uint8Vector - ? static_cast<const Uint8Vector *>(array_segments()) - : nullptr; + ? static_cast<const Uint8Vector *>(array_segments()) + : nullptr; } SparseIndexVector array_indices_type() const { @@ -2296,20 +2295,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Int32Vector *array_indices_as_Int32Vector() const { return array_indices_type() == SparseIndexVector_Int32Vector - ? static_cast<const Int32Vector *>(array_indices()) - : nullptr; + ? static_cast<const Int32Vector *>(array_indices()) + : nullptr; } const Uint16Vector *array_indices_as_Uint16Vector() const { return array_indices_type() == SparseIndexVector_Uint16Vector - ? static_cast<const Uint16Vector *>(array_indices()) - : nullptr; + ? static_cast<const Uint16Vector *>(array_indices()) + : nullptr; } const Uint8Vector *array_indices_as_Uint8Vector() const { return array_indices_type() == SparseIndexVector_Uint8Vector - ? static_cast<const Uint8Vector *>(array_indices()) - : nullptr; + ? static_cast<const Uint8Vector *>(array_indices()) + : nullptr; } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2435,7 +2434,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>( - VT_DIM_METADATA); + VT_DIM_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2460,7 +2459,7 @@ struct SparsityParametersBuilder fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); } void add_dim_metadata( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata) { fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); } @@ -2478,11 +2477,10 @@ struct SparsityParametersBuilder }; inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = - 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0) { SparsityParametersBuilder builder_(_fbb); builder_.add_dim_metadata(dim_metadata); @@ -2492,14 +2490,14 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( } inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr, - const std::vector<int32_t> *block_map = nullptr, - const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr, + const std::vector<int32_t> *block_map = nullptr, + const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr) { return onert_tflite::CreateSparsityParameters( - _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0, - block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0, - dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0); + _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0, + block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0, + dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0); } struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2619,16 +2617,16 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset<Tensor> CreateTensorDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, - flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false, - flatbuffers::Offset<SparsityParameters> sparsity = 0, - const std::vector<int32_t> *shape_signature = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, + flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false, + flatbuffers::Offset<SparsityParameters> sparsity = 0, + const std::vector<int32_t> *shape_signature = nullptr) { return onert_tflite::CreateTensor( - _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer, - name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity, - shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0); + _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer, + name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity, + shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0); } struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2890,10 +2888,10 @@ struct DepthwiseConv2DOptionsBuilder }; inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, - int32_t stride_h = 0, int32_t depth_multiplier = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, + int32_t stride_h = 0, int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { DepthwiseConv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -2942,12 +2940,12 @@ struct ConcatEmbeddingsOptionsBuilder fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); } void add_num_columns_per_channel( - flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); } void add_embedding_dim_per_channel( - flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); @@ -2966,9 +2964,9 @@ struct ConcatEmbeddingsOptionsBuilder }; inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) { ConcatEmbeddingsOptionsBuilder builder_(_fbb); builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); @@ -2983,9 +2981,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_ const std::vector<int32_t> *embedding_dim_per_channel = nullptr) { return onert_tflite::CreateConcatEmbeddingsOptions( - _fbb, num_channels, - num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0, - embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0); + _fbb, num_channels, + num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0); } struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -3219,9 +3217,9 @@ struct SequenceRNNOptionsBuilder }; inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3296,9 +3294,9 @@ struct BidirectionalSequenceRNNOptionsBuilder }; inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - bool merge_outputs = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool merge_outputs = false, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3378,10 +3376,10 @@ struct FullyConnectedOptionsBuilder }; inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT, - bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3474,8 +3472,8 @@ struct ConcatenationOptionsBuilder }; inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); @@ -3669,7 +3667,7 @@ struct LocalResponseNormalizationOptionsBuilder fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); } explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3845,7 +3843,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder static_cast<uint8_t>(asymmetric_quantize_inputs), 0); } explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3861,10 +3859,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, + bool asymmetric_quantize_inputs = false) { UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -3943,7 +3941,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder static_cast<uint8_t>(asymmetric_quantize_inputs), 0); } explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3958,10 +3956,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder }; inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, - bool time_major = true, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, + bool time_major = true, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4844,7 +4842,7 @@ CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *squeeze_dims = nullptr) { return onert_tflite::CreateSqueezeOptions( - _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0); + _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0); } struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -7206,7 +7204,7 @@ CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *custom_code = nullptr, int32_t version = 1) { return onert_tflite::CreateOperatorCode( - _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); + _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); } struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -7241,611 +7239,611 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Conv2DOptions *builtin_options_as_Conv2DOptions() const { return builtin_options_type() == BuiltinOptions_Conv2DOptions - ? static_cast<const Conv2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const Conv2DOptions *>(builtin_options()) + : nullptr; } const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions - ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) + : nullptr; } const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) + : nullptr; } const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { return builtin_options_type() == BuiltinOptions_LSHProjectionOptions - ? static_cast<const LSHProjectionOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LSHProjectionOptions *>(builtin_options()) + : nullptr; } const Pool2DOptions *builtin_options_as_Pool2DOptions() const { return builtin_options_type() == BuiltinOptions_Pool2DOptions - ? static_cast<const Pool2DOptions *>(builtin_options()) - : nullptr; + ? static_cast<const Pool2DOptions *>(builtin_options()) + : nullptr; } const SVDFOptions *builtin_options_as_SVDFOptions() const { return builtin_options_type() == BuiltinOptions_SVDFOptions - ? static_cast<const SVDFOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SVDFOptions *>(builtin_options()) + : nullptr; } const RNNOptions *builtin_options_as_RNNOptions() const { return builtin_options_type() == BuiltinOptions_RNNOptions - ? static_cast<const RNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const RNNOptions *>(builtin_options()) + : nullptr; } const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { return builtin_options_type() == BuiltinOptions_FullyConnectedOptions - ? static_cast<const FullyConnectedOptions *>(builtin_options()) - : nullptr; + ? static_cast<const FullyConnectedOptions *>(builtin_options()) + : nullptr; } const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { return builtin_options_type() == BuiltinOptions_SoftmaxOptions - ? static_cast<const SoftmaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SoftmaxOptions *>(builtin_options()) + : nullptr; } const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { return builtin_options_type() == BuiltinOptions_ConcatenationOptions - ? static_cast<const ConcatenationOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ConcatenationOptions *>(builtin_options()) + : nullptr; } const AddOptions *builtin_options_as_AddOptions() const { return builtin_options_type() == BuiltinOptions_AddOptions - ? static_cast<const AddOptions *>(builtin_options()) - : nullptr; + ? static_cast<const AddOptions *>(builtin_options()) + : nullptr; } const L2NormOptions *builtin_options_as_L2NormOptions() const { return builtin_options_type() == BuiltinOptions_L2NormOptions - ? static_cast<const L2NormOptions *>(builtin_options()) - : nullptr; + ? static_cast<const L2NormOptions *>(builtin_options()) + : nullptr; } const LocalResponseNormalizationOptions * builtin_options_as_LocalResponseNormalizationOptions() const { return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) + : nullptr; } const LSTMOptions *builtin_options_as_LSTMOptions() const { return builtin_options_type() == BuiltinOptions_LSTMOptions - ? static_cast<const LSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LSTMOptions *>(builtin_options()) + : nullptr; } const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions - ? static_cast<const ResizeBilinearOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ResizeBilinearOptions *>(builtin_options()) + : nullptr; } const CallOptions *builtin_options_as_CallOptions() const { return builtin_options_type() == BuiltinOptions_CallOptions - ? static_cast<const CallOptions *>(builtin_options()) - : nullptr; + ? static_cast<const CallOptions *>(builtin_options()) + : nullptr; } const ReshapeOptions *builtin_options_as_ReshapeOptions() const { return builtin_options_type() == BuiltinOptions_ReshapeOptions - ? static_cast<const ReshapeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ReshapeOptions *>(builtin_options()) + : nullptr; } const SkipGramOptions *builtin_options_as_SkipGramOptions() const { return builtin_options_type() == BuiltinOptions_SkipGramOptions - ? static_cast<const SkipGramOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SkipGramOptions *>(builtin_options()) + : nullptr; } const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions - ? static_cast<const SpaceToDepthOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SpaceToDepthOptions *>(builtin_options()) + : nullptr; } const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) - : nullptr; + ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) + : nullptr; } const MulOptions *builtin_options_as_MulOptions() const { return builtin_options_type() == BuiltinOptions_MulOptions - ? static_cast<const MulOptions *>(builtin_options()) - : nullptr; + ? static_cast<const MulOptions *>(builtin_options()) + : nullptr; } const PadOptions *builtin_options_as_PadOptions() const { return builtin_options_type() == BuiltinOptions_PadOptions - ? static_cast<const PadOptions *>(builtin_options()) - : nullptr; + ? static_cast<const PadOptions *>(builtin_options()) + : nullptr; } const GatherOptions *builtin_options_as_GatherOptions() const { return builtin_options_type() == BuiltinOptions_GatherOptions - ? static_cast<const GatherOptions *>(builtin_options()) - : nullptr; + ? static_cast<const GatherOptions *>(builtin_options()) + : nullptr; } const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions - ? static_cast<const BatchToSpaceNDOptions *>(builtin_options()) - : nullptr; + ? static_cast<const BatchToSpaceNDOptions *>(builtin_options()) + : nullptr; } const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions - ? static_cast<const SpaceToBatchNDOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SpaceToBatchNDOptions *>(builtin_options()) + : nullptr; } const TransposeOptions *builtin_options_as_TransposeOptions() const { return builtin_options_type() == BuiltinOptions_TransposeOptions - ? static_cast<const TransposeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const TransposeOptions *>(builtin_options()) + : nullptr; } const ReducerOptions *builtin_options_as_ReducerOptions() const { return builtin_options_type() == BuiltinOptions_ReducerOptions - ? static_cast<const ReducerOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ReducerOptions *>(builtin_options()) + : nullptr; } const SubOptions *builtin_options_as_SubOptions() const { return builtin_options_type() == BuiltinOptions_SubOptions - ? static_cast<const SubOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SubOptions *>(builtin_options()) + : nullptr; } const DivOptions *builtin_options_as_DivOptions() const { return builtin_options_type() == BuiltinOptions_DivOptions - ? static_cast<const DivOptions *>(builtin_options()) - : nullptr; + ? static_cast<const DivOptions *>(builtin_options()) + : nullptr; } const SqueezeOptions *builtin_options_as_SqueezeOptions() const { return builtin_options_type() == BuiltinOptions_SqueezeOptions - ? static_cast<const SqueezeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SqueezeOptions *>(builtin_options()) + : nullptr; } const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_SequenceRNNOptions - ? static_cast<const SequenceRNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SequenceRNNOptions *>(builtin_options()) + : nullptr; } const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { return builtin_options_type() == BuiltinOptions_StridedSliceOptions - ? static_cast<const StridedSliceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const StridedSliceOptions *>(builtin_options()) + : nullptr; } const ExpOptions *builtin_options_as_ExpOptions() const { return builtin_options_type() == BuiltinOptions_ExpOptions - ? static_cast<const ExpOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ExpOptions *>(builtin_options()) + : nullptr; } const TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == BuiltinOptions_TopKV2Options - ? static_cast<const TopKV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const TopKV2Options *>(builtin_options()) + : nullptr; } const SplitOptions *builtin_options_as_SplitOptions() const { return builtin_options_type() == BuiltinOptions_SplitOptions - ? static_cast<const SplitOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SplitOptions *>(builtin_options()) + : nullptr; } const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions - ? static_cast<const LogSoftmaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LogSoftmaxOptions *>(builtin_options()) + : nullptr; } const CastOptions *builtin_options_as_CastOptions() const { return builtin_options_type() == BuiltinOptions_CastOptions - ? static_cast<const CastOptions *>(builtin_options()) - : nullptr; + ? static_cast<const CastOptions *>(builtin_options()) + : nullptr; } const DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == BuiltinOptions_DequantizeOptions - ? static_cast<const DequantizeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const DequantizeOptions *>(builtin_options()) + : nullptr; } const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions - ? static_cast<const MaximumMinimumOptions *>(builtin_options()) - : nullptr; + ? static_cast<const MaximumMinimumOptions *>(builtin_options()) + : nullptr; } const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { return builtin_options_type() == BuiltinOptions_ArgMaxOptions - ? static_cast<const ArgMaxOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ArgMaxOptions *>(builtin_options()) + : nullptr; } const LessOptions *builtin_options_as_LessOptions() const { return builtin_options_type() == BuiltinOptions_LessOptions - ? static_cast<const LessOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LessOptions *>(builtin_options()) + : nullptr; } const NegOptions *builtin_options_as_NegOptions() const { return builtin_options_type() == BuiltinOptions_NegOptions - ? static_cast<const NegOptions *>(builtin_options()) - : nullptr; + ? static_cast<const NegOptions *>(builtin_options()) + : nullptr; } const PadV2Options *builtin_options_as_PadV2Options() const { return builtin_options_type() == BuiltinOptions_PadV2Options - ? static_cast<const PadV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const PadV2Options *>(builtin_options()) + : nullptr; } const GreaterOptions *builtin_options_as_GreaterOptions() const { return builtin_options_type() == BuiltinOptions_GreaterOptions - ? static_cast<const GreaterOptions *>(builtin_options()) - : nullptr; + ? static_cast<const GreaterOptions *>(builtin_options()) + : nullptr; } const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { return builtin_options_type() == BuiltinOptions_GreaterEqualOptions - ? static_cast<const GreaterEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const GreaterEqualOptions *>(builtin_options()) + : nullptr; } const LessEqualOptions *builtin_options_as_LessEqualOptions() const { return builtin_options_type() == BuiltinOptions_LessEqualOptions - ? static_cast<const LessEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LessEqualOptions *>(builtin_options()) + : nullptr; } const SelectOptions *builtin_options_as_SelectOptions() const { return builtin_options_type() == BuiltinOptions_SelectOptions - ? static_cast<const SelectOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SelectOptions *>(builtin_options()) + : nullptr; } const SliceOptions *builtin_options_as_SliceOptions() const { return builtin_options_type() == BuiltinOptions_SliceOptions - ? static_cast<const SliceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SliceOptions *>(builtin_options()) + : nullptr; } const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { return builtin_options_type() == BuiltinOptions_TransposeConvOptions - ? static_cast<const TransposeConvOptions *>(builtin_options()) - : nullptr; + ? static_cast<const TransposeConvOptions *>(builtin_options()) + : nullptr; } const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { return builtin_options_type() == BuiltinOptions_SparseToDenseOptions - ? static_cast<const SparseToDenseOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SparseToDenseOptions *>(builtin_options()) + : nullptr; } const TileOptions *builtin_options_as_TileOptions() const { return builtin_options_type() == BuiltinOptions_TileOptions - ? static_cast<const TileOptions *>(builtin_options()) - : nullptr; + ? static_cast<const TileOptions *>(builtin_options()) + : nullptr; } const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { return builtin_options_type() == BuiltinOptions_ExpandDimsOptions - ? static_cast<const ExpandDimsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ExpandDimsOptions *>(builtin_options()) + : nullptr; } const EqualOptions *builtin_options_as_EqualOptions() const { return builtin_options_type() == BuiltinOptions_EqualOptions - ? static_cast<const EqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const EqualOptions *>(builtin_options()) + : nullptr; } const NotEqualOptions *builtin_options_as_NotEqualOptions() const { return builtin_options_type() == BuiltinOptions_NotEqualOptions - ? static_cast<const NotEqualOptions *>(builtin_options()) - : nullptr; + ? static_cast<const NotEqualOptions *>(builtin_options()) + : nullptr; } const ShapeOptions *builtin_options_as_ShapeOptions() const { return builtin_options_type() == BuiltinOptions_ShapeOptions - ? static_cast<const ShapeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ShapeOptions *>(builtin_options()) + : nullptr; } const PowOptions *builtin_options_as_PowOptions() const { return builtin_options_type() == BuiltinOptions_PowOptions - ? static_cast<const PowOptions *>(builtin_options()) - : nullptr; + ? static_cast<const PowOptions *>(builtin_options()) + : nullptr; } const ArgMinOptions *builtin_options_as_ArgMinOptions() const { return builtin_options_type() == BuiltinOptions_ArgMinOptions - ? static_cast<const ArgMinOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ArgMinOptions *>(builtin_options()) + : nullptr; } const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { return builtin_options_type() == BuiltinOptions_FakeQuantOptions - ? static_cast<const FakeQuantOptions *>(builtin_options()) - : nullptr; + ? static_cast<const FakeQuantOptions *>(builtin_options()) + : nullptr; } const PackOptions *builtin_options_as_PackOptions() const { return builtin_options_type() == BuiltinOptions_PackOptions - ? static_cast<const PackOptions *>(builtin_options()) - : nullptr; + ? static_cast<const PackOptions *>(builtin_options()) + : nullptr; } const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { return builtin_options_type() == BuiltinOptions_LogicalOrOptions - ? static_cast<const LogicalOrOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LogicalOrOptions *>(builtin_options()) + : nullptr; } const OneHotOptions *builtin_options_as_OneHotOptions() const { return builtin_options_type() == BuiltinOptions_OneHotOptions - ? static_cast<const OneHotOptions *>(builtin_options()) - : nullptr; + ? static_cast<const OneHotOptions *>(builtin_options()) + : nullptr; } const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { return builtin_options_type() == BuiltinOptions_LogicalAndOptions - ? static_cast<const LogicalAndOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LogicalAndOptions *>(builtin_options()) + : nullptr; } const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { return builtin_options_type() == BuiltinOptions_LogicalNotOptions - ? static_cast<const LogicalNotOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LogicalNotOptions *>(builtin_options()) + : nullptr; } const UnpackOptions *builtin_options_as_UnpackOptions() const { return builtin_options_type() == BuiltinOptions_UnpackOptions - ? static_cast<const UnpackOptions *>(builtin_options()) - : nullptr; + ? static_cast<const UnpackOptions *>(builtin_options()) + : nullptr; } const FloorDivOptions *builtin_options_as_FloorDivOptions() const { return builtin_options_type() == BuiltinOptions_FloorDivOptions - ? static_cast<const FloorDivOptions *>(builtin_options()) - : nullptr; + ? static_cast<const FloorDivOptions *>(builtin_options()) + : nullptr; } const SquareOptions *builtin_options_as_SquareOptions() const { return builtin_options_type() == BuiltinOptions_SquareOptions - ? static_cast<const SquareOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SquareOptions *>(builtin_options()) + : nullptr; } const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { return builtin_options_type() == BuiltinOptions_ZerosLikeOptions - ? static_cast<const ZerosLikeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ZerosLikeOptions *>(builtin_options()) + : nullptr; } const FillOptions *builtin_options_as_FillOptions() const { return builtin_options_type() == BuiltinOptions_FillOptions - ? static_cast<const FillOptions *>(builtin_options()) - : nullptr; + ? static_cast<const FillOptions *>(builtin_options()) + : nullptr; } const BidirectionalSequenceLSTMOptions * builtin_options_as_BidirectionalSequenceLSTMOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions - ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; } const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions - ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) + : nullptr; } const UnidirectionalSequenceLSTMOptions * builtin_options_as_UnidirectionalSequenceLSTMOptions() const { return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions - ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) - : nullptr; + ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) + : nullptr; } const FloorModOptions *builtin_options_as_FloorModOptions() const { return builtin_options_type() == BuiltinOptions_FloorModOptions - ? static_cast<const FloorModOptions *>(builtin_options()) - : nullptr; + ? static_cast<const FloorModOptions *>(builtin_options()) + : nullptr; } const RangeOptions *builtin_options_as_RangeOptions() const { return builtin_options_type() == BuiltinOptions_RangeOptions - ? static_cast<const RangeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const RangeOptions *>(builtin_options()) + : nullptr; } const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const { return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions - ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options()) + : nullptr; } const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { return builtin_options_type() == BuiltinOptions_LeakyReluOptions - ? static_cast<const LeakyReluOptions *>(builtin_options()) - : nullptr; + ? static_cast<const LeakyReluOptions *>(builtin_options()) + : nullptr; } const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions - ? static_cast<const SquaredDifferenceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SquaredDifferenceOptions *>(builtin_options()) + : nullptr; } const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { return builtin_options_type() == BuiltinOptions_MirrorPadOptions - ? static_cast<const MirrorPadOptions *>(builtin_options()) - : nullptr; + ? static_cast<const MirrorPadOptions *>(builtin_options()) + : nullptr; } const AbsOptions *builtin_options_as_AbsOptions() const { return builtin_options_type() == BuiltinOptions_AbsOptions - ? static_cast<const AbsOptions *>(builtin_options()) - : nullptr; + ? static_cast<const AbsOptions *>(builtin_options()) + : nullptr; } const SplitVOptions *builtin_options_as_SplitVOptions() const { return builtin_options_type() == BuiltinOptions_SplitVOptions - ? static_cast<const SplitVOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SplitVOptions *>(builtin_options()) + : nullptr; } const UniqueOptions *builtin_options_as_UniqueOptions() const { return builtin_options_type() == BuiltinOptions_UniqueOptions - ? static_cast<const UniqueOptions *>(builtin_options()) - : nullptr; + ? static_cast<const UniqueOptions *>(builtin_options()) + : nullptr; } const ReverseV2Options *builtin_options_as_ReverseV2Options() const { return builtin_options_type() == BuiltinOptions_ReverseV2Options - ? static_cast<const ReverseV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const ReverseV2Options *>(builtin_options()) + : nullptr; } const AddNOptions *builtin_options_as_AddNOptions() const { return builtin_options_type() == BuiltinOptions_AddNOptions - ? static_cast<const AddNOptions *>(builtin_options()) - : nullptr; + ? static_cast<const AddNOptions *>(builtin_options()) + : nullptr; } const GatherNdOptions *builtin_options_as_GatherNdOptions() const { return builtin_options_type() == BuiltinOptions_GatherNdOptions - ? static_cast<const GatherNdOptions *>(builtin_options()) - : nullptr; + ? static_cast<const GatherNdOptions *>(builtin_options()) + : nullptr; } const CosOptions *builtin_options_as_CosOptions() const { return builtin_options_type() == BuiltinOptions_CosOptions - ? static_cast<const CosOptions *>(builtin_options()) - : nullptr; + ? static_cast<const CosOptions *>(builtin_options()) + : nullptr; } const WhereOptions *builtin_options_as_WhereOptions() const { return builtin_options_type() == BuiltinOptions_WhereOptions - ? static_cast<const WhereOptions *>(builtin_options()) - : nullptr; + ? static_cast<const WhereOptions *>(builtin_options()) + : nullptr; } const RankOptions *builtin_options_as_RankOptions() const { return builtin_options_type() == BuiltinOptions_RankOptions - ? static_cast<const RankOptions *>(builtin_options()) - : nullptr; + ? static_cast<const RankOptions *>(builtin_options()) + : nullptr; } const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions - ? static_cast<const ReverseSequenceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ReverseSequenceOptions *>(builtin_options()) + : nullptr; } const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { return builtin_options_type() == BuiltinOptions_MatrixDiagOptions - ? static_cast<const MatrixDiagOptions *>(builtin_options()) - : nullptr; + ? static_cast<const MatrixDiagOptions *>(builtin_options()) + : nullptr; } const QuantizeOptions *builtin_options_as_QuantizeOptions() const { return builtin_options_type() == BuiltinOptions_QuantizeOptions - ? static_cast<const QuantizeOptions *>(builtin_options()) - : nullptr; + ? static_cast<const QuantizeOptions *>(builtin_options()) + : nullptr; } const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions - ? static_cast<const MatrixSetDiagOptions *>(builtin_options()) - : nullptr; + ? static_cast<const MatrixSetDiagOptions *>(builtin_options()) + : nullptr; } const HardSwishOptions *builtin_options_as_HardSwishOptions() const { return builtin_options_type() == BuiltinOptions_HardSwishOptions - ? static_cast<const HardSwishOptions *>(builtin_options()) - : nullptr; + ? static_cast<const HardSwishOptions *>(builtin_options()) + : nullptr; } const IfOptions *builtin_options_as_IfOptions() const { return builtin_options_type() == BuiltinOptions_IfOptions - ? static_cast<const IfOptions *>(builtin_options()) - : nullptr; + ? static_cast<const IfOptions *>(builtin_options()) + : nullptr; } const WhileOptions *builtin_options_as_WhileOptions() const { return builtin_options_type() == BuiltinOptions_WhileOptions - ? static_cast<const WhileOptions *>(builtin_options()) - : nullptr; + ? static_cast<const WhileOptions *>(builtin_options()) + : nullptr; } const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions - ? static_cast<const DepthToSpaceOptions *>(builtin_options()) - : nullptr; + ? static_cast<const DepthToSpaceOptions *>(builtin_options()) + : nullptr; } const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options - ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options()) - : nullptr; + ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options()) + : nullptr; } const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options - ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options()) - : nullptr; + ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options()) + : nullptr; } const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { return builtin_options_type() == BuiltinOptions_ScatterNdOptions - ? static_cast<const ScatterNdOptions *>(builtin_options()) - : nullptr; + ? static_cast<const ScatterNdOptions *>(builtin_options()) + : nullptr; } const SelectV2Options *builtin_options_as_SelectV2Options() const { return builtin_options_type() == BuiltinOptions_SelectV2Options - ? static_cast<const SelectV2Options *>(builtin_options()) - : nullptr; + ? static_cast<const SelectV2Options *>(builtin_options()) + : nullptr; } const DensifyOptions *builtin_options_as_DensifyOptions() const { return builtin_options_type() == BuiltinOptions_DensifyOptions - ? static_cast<const DensifyOptions *>(builtin_options()) - : nullptr; + ? static_cast<const DensifyOptions *>(builtin_options()) + : nullptr; } const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { return builtin_options_type() == BuiltinOptions_SegmentSumOptions - ? static_cast<const SegmentSumOptions *>(builtin_options()) - : nullptr; + ? static_cast<const SegmentSumOptions *>(builtin_options()) + : nullptr; } const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == BuiltinOptions_BatchMatMulOptions - ? static_cast<const BatchMatMulOptions *>(builtin_options()) - : nullptr; + ? static_cast<const BatchMatMulOptions *>(builtin_options()) + : nullptr; } const flatbuffers::Vector<uint8_t> *custom_options() const { @@ -8457,7 +8455,7 @@ struct OperatorBuilder static_cast<int8_t>(custom_options_format), 0); } void add_mutating_variable_inputs( - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) { fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); } @@ -8514,11 +8512,11 @@ CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index const std::vector<int32_t> *intermediates = nullptr) { return onert_tflite::CreateOperator( - _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, - outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options, - custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format, - mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0, - intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0); + _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options, + custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format, + mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0, + intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0); } struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -8602,12 +8600,12 @@ struct SubGraphBuilder }; inline flatbuffers::Offset<SubGraph> CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0, - flatbuffers::Offset<flatbuffers::String> name = 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0, + flatbuffers::Offset<flatbuffers::String> name = 0) { SubGraphBuilder builder_(_fbb); builder_.add_name(name); @@ -8618,20 +8616,18 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph( return builder_.Finish(); } -inline flatbuffers::Offset<SubGraph> -CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb, - const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr, - const std::vector<int32_t> *inputs = nullptr, - const std::vector<int32_t> *outputs = nullptr, - const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, - const char *name = nullptr) +inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr, + const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, + const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr) { return onert_tflite::CreateSubGraph( - _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0, - inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, - outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, - operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0, - name ? _fbb.CreateString(name) : 0); + _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0, + inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, + outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, + operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0, + name ? _fbb.CreateString(name) : 0); } struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -8762,7 +8758,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>( - VT_OPERATOR_CODES); + VT_OPERATOR_CODES); } const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const { @@ -8805,7 +8801,7 @@ struct ModelBuilder flatbuffers::uoffset_t start_; void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); } void add_operator_codes( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } @@ -8845,13 +8841,13 @@ struct ModelBuilder }; inline flatbuffers::Offset<Model> CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0, - flatbuffers::Offset<flatbuffers::String> description = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0, + flatbuffers::Offset<flatbuffers::String> description = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0) { ModelBuilder builder_(_fbb); builder_.add_metadata(metadata); @@ -8874,13 +8870,13 @@ CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr) { return onert_tflite::CreateModel( - _fbb, version, - operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0, - subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0, - description ? _fbb.CreateString(description) : 0, - buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0, - metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0, - metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0); + _fbb, version, + operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0, + description ? _fbb.CreateString(description) : 0, + buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0, + metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0, + metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0); } inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, diff --git a/runtime/onert/sample/.clang-format b/runtime/onert/sample/.clang-format new file mode 120000 index 000000000..83185fee3 --- /dev/null +++ b/runtime/onert/sample/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8
\ No newline at end of file diff --git a/runtime/onert/test/.clang-format b/runtime/onert/test/.clang-format new file mode 120000 index 000000000..83185fee3 --- /dev/null +++ b/runtime/onert/test/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8
\ No newline at end of file diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc index 50f3964db..c77ebb895 100644 --- a/runtime/onert/test/core/compiler/Scheduler.cc +++ b/runtime/onert/test/core/compiler/HEScheduler.cc @@ -55,8 +55,7 @@ struct MockBackendCPU : public Backend std::unique_ptr<BackendContext> newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override { - return std::unique_ptr<BackendContext>( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr}); } }; @@ -79,8 +78,7 @@ struct MockBackendGPU : public Backend std::unique_ptr<BackendContext> newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override { - return std::unique_ptr<BackendContext>( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr}); } }; @@ -103,8 +101,7 @@ struct MockBackendNPU : public Backend std::unique_ptr<BackendContext> newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override { - return std::unique_ptr<BackendContext>( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr}); } }; @@ -165,7 +162,7 @@ void setOperationsExecutionTime(const std::vector<const Backend *> &backends, for (auto &backend : backends) setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); } - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // Set permute time from one backend to another. This method is needed since ExecutionTime has only @@ -195,7 +192,7 @@ void setPermutationsExecutionTime(const std::vector<const Backend *> &backends, setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); } } - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // @@ -304,7 +301,7 @@ std::shared_ptr<Graph> createBranchedGraph() // // SetUp/TearDown methods runs before/after each test and performs actions common for each test -class SchedulerTest : public ::testing::Test +class HESchedulerTest : public ::testing::Test { protected: void SetUp() override @@ -359,8 +356,8 @@ protected: std::string _original_profiling_mode; }; -class SchedulerTestWithExecutorParam : public SchedulerTest, - public testing::WithParamInterface<std::string> +class HESchedulerTestWithExecutorParam : public HESchedulerTest, + public testing::WithParamInterface<std::string> { }; @@ -369,7 +366,7 @@ class SchedulerTestWithExecutorParam : public SchedulerTest, // // Test scheduler behavior for straight graph with known execution time of all nodes and permutes. -TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) +TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) { setExecutor(GetParam()); @@ -392,7 +389,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -422,7 +419,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) } // Test scheduler behavior for branched graph with known execution time of all nodes and permutes -TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) +TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) { const int64_t NPU_ET = 5000; setExecutor(GetParam()); @@ -432,7 +429,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) auto graph(createBranchedGraph()); subgs.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); + sub_op_idx(5); // Set default execution and transfer time setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); @@ -451,7 +448,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -463,7 +460,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) if (GetParam() == PARALLEL) { branch1_expected_backend = - br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; + br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; } @@ -486,7 +483,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) * branching or scheduler assigns another backend to a node*/ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -504,11 +501,11 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - // one time for each executor -INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam, +INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, testing::Values(LINEAR, DATAFLOW, PARALLEL)); // Test scheduler behavior for branched graph and enabled profiling mode -TEST_F(SchedulerTest, branched_graph_profiling_mode) +TEST_F(HESchedulerTest, branched_graph_profiling_mode) { const int ET = 1e5; @@ -521,7 +518,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) auto graph(createBranchedGraph()); subgs.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); + sub_op_idx(5); // Test 1 // Expected behaviour: scheduler assigns backends to nodes with unknown execution time @@ -537,7 +534,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -560,7 +557,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc index 806b47ecc..0e742e1e4 100644 --- a/runtime/onert/test/core/exec/ExecInstance.cc +++ b/runtime/onert/test/core/exec/ExecInstance.cc @@ -21,6 +21,7 @@ #include "compiler/Compiler.h" #include "exec/Execution.h" #include "ir/operation/BinaryArithmetic.h" +#include "util/TracingCtx.h" namespace { @@ -51,8 +52,8 @@ public: auto operand_rhs2 = graph->addOperand(shape, type); auto operand_result2 = graph->addOperand(shape, type); graph->operands() - .at(operand_rhs2) - .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); + .at(operand_rhs2) + .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); // 2nd add operations (result2 <= result1 + rhs2) operation::BinaryArithmetic::Param param1; param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; @@ -60,14 +61,14 @@ public: auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); operation::BinaryArithmetic::Param param2; param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param2.activation = Activation::NONE; auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); // Identify model inputs and outputs graph->addInput(operand_lhs); graph->addInput(operand_rhs1); @@ -77,13 +78,15 @@ public: // Compile auto subgs = std::make_shared<onert::ir::Subgraphs>(); subgs->push(onert::ir::SubgraphIndex{0}, graph); - onert::compiler::Compiler compiler{subgs}; + tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get()); + onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; executors = compiler.compile(); } public: std::shared_ptr<Graph> graph; std::shared_ptr<onert::exec::ExecutorMap> executors; + std::unique_ptr<onert::util::TracingCtx> tracing_ctx; }; TEST(ExecInstance, simple) @@ -137,7 +140,8 @@ TEST(ExecInstance, twoCompile) // Make new executor: compile again auto subgs = std::make_shared<onert::ir::Subgraphs>(); subgs->push(onert::ir::SubgraphIndex{0}, graph); - onert::compiler::Compiler compiler{subgs}; + auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get()); + onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile(); onert::exec::Execution execution2{executors2}; @@ -205,7 +209,7 @@ class Inference public: Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], std::shared_ptr<onert::exec::ExecutorMap> &executors) - : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} + : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} { // DO NOTHING } diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc index 8c2e34df8..6b0c35a79 100644 --- a/runtime/onert/test/core/exec/ExecTime.test.cc +++ b/runtime/onert/test/core/exec/ExecTime.test.cc @@ -62,7 +62,7 @@ TEST(ExecTime, roundtrip_ok) et.updateOperationExecTime(b, "op1", true, 100, 100); et.updateOperationExecTime(b, "op1", true, 200, 200); et.updateOperationExecTime(b, "op1", false, 100, 888); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } { ExecTime et(bs); @@ -73,7 +73,7 @@ TEST(ExecTime, roundtrip_ok) ASSERT_EQ(time, 150); time = et.getOperationExecTime(b, "op1", false, 100); ASSERT_EQ(time, 888); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // clean up EXPECT_EQ(remove("exec_time.json"), 0); @@ -88,7 +88,7 @@ TEST(ExecTime, structure) ExecTime et(bs); et.updateOperationExecTime(b, "op1", true, 100, 100); et.updateOperationExecTime(b, "op1", true, 200, 200); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } { ExecTime et(bs); @@ -97,7 +97,7 @@ TEST(ExecTime, structure) // Check interpolation time = et.getOperationExecTime(b, "op1", true, 200); ASSERT_EQ(time, 200); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // clean up EXPECT_EQ(remove("exec_time.json"), 0); diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc index 0c7b1b762..327c38f79 100644 --- a/runtime/onert/test/core/interp/ExecManager.cc +++ b/runtime/onert/test/core/interp/ExecManager.cc @@ -63,7 +63,7 @@ protected: auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; _graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); + std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); // Identify model inputs and outputs @@ -79,7 +79,7 @@ protected: _executors = std::make_shared<ExecutorMap>(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); } void CreateTwoStepModel() @@ -109,8 +109,8 @@ protected: auto operand_rhs2 = _graph->addOperand(shape, type); auto operand_result2 = _graph->addOperand(shape, type); _graph->operands() - .at(operand_rhs2) - .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); + .at(operand_rhs2) + .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); // 2nd add operations (result2 <= result1 + rhs2) @@ -120,7 +120,7 @@ protected: auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; _graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); operation::BinaryArithmetic::Param param2; param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; @@ -128,7 +128,7 @@ protected: auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; _graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); // Identify model inputs and outputs @@ -144,7 +144,7 @@ protected: _executors = std::make_shared<ExecutorMap>(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); } void CreateUnspecifiedDimensionsModel() @@ -168,9 +168,8 @@ protected: auto operand_activation = _graph->addOperand(shape_scalar, type_scalar); _graph->operands() - .at(operand_activation) - .data( - std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4)); + .at(operand_activation) + .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4)); auto operand_result = _graph->addOperand(shape, type); @@ -182,7 +181,7 @@ protected: auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; _graph->addOperation( - std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); + std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); // Identify model inputs and outputs @@ -198,7 +197,7 @@ protected: _executors = std::make_shared<ExecutorMap>(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); } void createExecution() { _execution = std::make_unique<Execution>(_executors); } diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h index 60b4719ed..0e7ed977b 100644 --- a/runtime/onert/test/graph/MockNode.h +++ b/runtime/onert/test/graph/MockNode.h @@ -30,7 +30,7 @@ class SimpleMock : public onert::ir::Operation public: SimpleMock(const onert::ir::OperandIndexSequence &inputs, const onert::ir::OperandIndexSequence &outputs) - : Operation{onert::ir::OperandConstraint::createAny()} + : Operation{onert::ir::OperandConstraint::createAny()} { setInputs(inputs); setOutputs(outputs); diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc index 206e402ed..5ef10027e 100644 --- a/runtime/onert/test/graph/operand/UseDef.cc +++ b/runtime/onert/test/graph/operand/UseDef.cc @@ -49,16 +49,16 @@ TEST(ir_Operand, neg_usedef) // MockNode1 auto operand_index1 = graph.addOperand(shape, type); auto mocknode_index1 = - graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1})); + graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1})); // MockNode2 auto operand_index2 = graph.addOperand(shape, type); auto mocknode_index2 = - graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2})); + graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2})); // MockNode3(two input) auto multiinput_index = graph.addOperation( - std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); + std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); graph.finishBuilding(); diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc index f1cbfd692..2ecaa2885 100644 --- a/runtime/onert/test/util/ShapeInference.cc +++ b/runtime/onert/test/util/ShapeInference.cc @@ -48,7 +48,7 @@ TEST(ShapeInference, Pool2DNodeSame) Padding padding{PaddingType::SAME}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -58,7 +58,7 @@ TEST(ShapeInference, Pool2DNodeSame) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -75,7 +75,7 @@ TEST(ShapeInference, Pool2DNodeValid) Padding padding{PaddingType::VALID}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -85,7 +85,7 @@ TEST(ShapeInference, Pool2DNodeValid) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -103,7 +103,7 @@ TEST(ShapeInference, Pool2DNodeExplicit) Padding padding{4, 3, 2, 1}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -113,7 +113,7 @@ TEST(ShapeInference, Pool2DNodeExplicit) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -130,7 +130,7 @@ TEST(ShapeInference, neg_Pool2DNode_InvalidStride) Padding padding{PaddingType::SAME}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param), std::runtime_error); } @@ -161,7 +161,7 @@ TEST(ShapeInference, Conv2D) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); param = - operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; + operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -190,7 +190,7 @@ TEST(ShapeInference, DepthwiseConv2D) operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, Activation::NONE, Dilation{1, 1}}; auto infered_out_shape = - onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -364,7 +364,7 @@ TEST(ShapeInference, Transpose) ASSERT_EQ(in_shape.rank(), perm.size()); ASSERT_EQ(expected.rank(), perm.size()); auto inferred_out_shape = - onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); + onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); // post-conditions ASSERT_EQ(inferred_out_shape.rank(), perm.size()); for (int32_t dim = 0; dim < expected.rank(); dim++) @@ -479,8 +479,8 @@ TEST(ShapeInference, BCQFullyConnected) { auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster, Shape &expected) { - auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, - cluster.data()); + auto actual = + onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data()); ASSERT_EQ(actual.rank(), expected.rank()); for (int32_t dim = 0; dim < expected.rank(); dim++) |