diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2022-09-07 19:04:21 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2022-09-07 19:04:21 +0900 |
commit | c690d52bdd137ed6a17353aa7af35e8141ece77b (patch) | |
tree | dbb7dd99133132dfbffcb8c9e9af4f1ffc2f4808 /runtime/onert | |
parent | 3ad689f0803519e343c36d5700646e86059df961 (diff) | |
download | nnfw-tizen_7.0_hotfix.tar.gz nnfw-tizen_7.0_hotfix.tar.bz2 nnfw-tizen_7.0_hotfix.zip |
Imported Upstream version 1.21.0upstream/1.21.0tizen_7.0_m2_releaseaccepted/tizen/unified/20220912.170817accepted/tizen/unified/20220912.164738accepted/tizen/7.0/unified/hotfix/20221116.105341accepted/tizen/7.0/unified/20221110.060236tizen_7.0_hotfixtizen_7.0accepted/tizen_7.0_unified_hotfixaccepted/tizen_7.0_unified
Diffstat (limited to 'runtime/onert')
155 files changed, 3149 insertions, 2050 deletions
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt index 88d52a5bd..3c9ca99da 100644 --- a/runtime/onert/CMakeLists.txt +++ b/runtime/onert/CMakeLists.txt @@ -7,9 +7,3 @@ add_subdirectory(frontend) add_subdirectory(core) add_subdirectory(api) add_subdirectory(sample) - -if(NOT ENABLE_TEST) - return() -endif(NOT ENABLE_TEST) - -add_subdirectory(test) diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt index beb243a4d..badd5d133 100644 --- a/runtime/onert/api/CMakeLists.txt +++ b/runtime/onert/api/CMakeLists.txt @@ -10,6 +10,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h) target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header) target_link_libraries(${ONERT_DEV} PRIVATE onert_core) +target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc) target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD}) target_link_libraries(${ONERT_DEV} PRIVATE trix_loader) target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common) diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index 6f296a931..658cba4d5 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -193,7 +193,7 @@ typedef struct nnfw_tensorinfo * And inference is performed after {@link nnfw_run} is invoked. * * <p>{@link nnfw_close_session} should be called once - * if session is no longer need + * if session is no longer needed * * @param[out] session The session to be created * @return NNFW_STATUS_NO_ERROR if successful @@ -213,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session); /** * @brief Load model from nnpackage file or directory * - * The length of \p package_file_path must not execeed 1024 bytes including zero at the end. + * The length of \p package_file_path must not exceed 1024 bytes including zero at the end. * * @param[in] session nnfw_session loading the given nnpackage file/dir * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 45b34716a..2fbb96f31 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01001400 +#define NNFW_VERSION 0x01001500 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index 0ebd385e9..a0e6ee094 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -58,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0); * @param session the session to be created * @return NNFW_STATUS_NO_ERROR if successful */ -NNFW_STATUS nnfw_create_session(nnfw_session **session) -{ - NNFW_RETURN_ERROR_IF_NULL(session); - - *session = new (std::nothrow) nnfw_session(); - if (*session == nullptr) - return NNFW_STATUS_OUT_OF_MEMORY; - return NNFW_STATUS_NO_ERROR; -} +NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); } /* * Close a session instance diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index 62a043921..9b43dd381 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -25,6 +25,7 @@ #include "tflite_loader.h" #include "trix_loader.h" #include "json/json.h" +#include "ir/NNPkg.h" #include "ir/OpCode.h" #include "util/TracingCtx.h" @@ -110,9 +111,7 @@ std::string trim(const std::string &value) return value.substr(begin, range); } -using CfgKeyValues = std::unordered_map<std::string, std::string>; - -bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) +bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues) { std::ifstream ifs(cfgfile); if (ifs.is_open()) @@ -143,19 +142,6 @@ bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) return false; } -void setConfigKeyValues(const CfgKeyValues &keyValues) -{ - auto configsrc = std::make_unique<onert::util::GeneralConfigSource>(); - - for (auto it = keyValues.begin(); it != keyValues.end(); ++it) - { - VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; - configsrc->set(it->first, it->second); - } - - onert::util::config_source_ext(std::move(configsrc)); -} - NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt) { using onert::ir::DataType; @@ -195,15 +181,59 @@ void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape, ti->dtype = datatype_to_nnfw_dtype(dtype); } +std::unique_ptr<onert::ir::Model> loadModel(const std::string filename, + const std::string model_type) +{ + if (model_type == "tflite") + return onert::tflite_loader::loadModel(filename.c_str()); + if (model_type == "circle") + return onert::circle_loader::loadModel(filename.c_str()); + if (model_type == "tvn") + return onert::trix_loader::loadModel(filename.c_str()); + + std::cerr << "Unsupported model type" << std::endl; + return std::unique_ptr<onert::ir::Model>(nullptr); +} + } // namespace nnfw_session::nnfw_session() - : _subgraphs{nullptr}, _compiler{nullptr}, _execution{nullptr}, - _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr} + : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr}, + _kernel_registry{nullptr} { // DO NOTHING } +NNFW_STATUS nnfw_session::create(nnfw_session **session) +{ + if (session == nullptr) + return NNFW_STATUS_UNEXPECTED_NULL; + + // Create session + *session = new (std::nothrow) nnfw_session(); + if (*session == nullptr) + { + std::cerr << "Error during session creation" << std::endl; + return NNFW_STATUS_OUT_OF_MEMORY; + } + + // Initialize fields + try + { + (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>(); + } + catch (const std::exception &e) + { + std::cerr << "Error during session initialization : " << e.what() << std::endl; + delete *session; + *session = nullptr; + + return NNFW_STATUS_ERROR; + } + + return NNFW_STATUS_NO_ERROR; +} + nnfw_session::~nnfw_session() = default; NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) @@ -219,19 +249,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) try { - _subgraphs = onert::circle_loader::loadModel(buffer, size); + auto model = onert::circle_loader::loadModel(buffer, size); + _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); - - _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -247,45 +274,28 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path) } std::string filename{model_file_path}; - if (filename.size() < 8) // .tflite or .circle + // TODO: Use std::filesystem::path when we can use c++17. + auto dotidx = filename.find_last_of('.'); + if (dotidx == std::string::npos) { - std::cerr << "Invalid model file path." << std::endl; + std::cerr << "Invalid model file path. Please use file with extension." << std::endl; return NNFW_STATUS_ERROR; } - - std::string model_type = filename.substr(filename.size() - 7, 7); - + std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot try { - if (model_type == ".tflite") - { - _subgraphs = onert::tflite_loader::loadModel(filename.c_str()); - } - else if (model_type == ".circle") - { - _subgraphs = onert::circle_loader::loadModel(filename.c_str()); - } - else if (model_type == ".tvn") - { - _subgraphs = onert::trix_loader::loadModel(filename.c_str()); - } - else - { - std::cerr << "Unsupported model type" << std::endl; + auto model = loadModel(filename, model_type); + if (model == nullptr) return NNFW_STATUS_ERROR; - } + _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); - - _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -334,45 +344,59 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir) { auto filepath = package_path + std::string("/metadata/") + configs[0].asString(); - CfgKeyValues keyValues; + onert::util::CfgKeyValues keyValues; if (loadConfigure(filepath, keyValues)) { - setConfigKeyValues(keyValues); + onert::util::setConfigKeyValues(keyValues); } } - - auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model - auto model_type = model_types[0].asString(); // first model's type - if (model_type == "tflite") + _nnpkg = std::make_shared<onert::ir::NNPkg>(); + for (uint32_t i = 0; i < models.size(); ++i) { - _subgraphs = onert::tflite_loader::loadModel(model_file_path); - } - else if (model_type == "circle") - { - _subgraphs = onert::circle_loader::loadModel(model_file_path); - } - else if (model_type == "tvn") - { - _subgraphs = onert::trix_loader::loadModel(model_file_path); + auto model_file_path = package_path + std::string("/") + models[i].asString(); + auto model_type = model_types[i].asString(); + auto model = loadModel(model_file_path, model_type); + if (model == nullptr) + return NNFW_STATUS_ERROR; + model->primary_subgraph()->bindKernelBuilder(_kernel_registry->getBuilder()); + _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); } - else + + auto toIODesc = [](std::string str) { + auto indices = nnfw::misc::split(str, ':'); + if (indices.size() != 3) + { + std::cerr << "IODesc should be 3-tuple." << std::endl; + return onert::ir::IODesc{}; + } + auto model_idx = static_cast<uint32_t>(std::stoi(indices.at(0))); + auto subgraph_idx = static_cast<uint32_t>(std::stoi(indices.at(1))); + auto operand_idx = static_cast<uint32_t>(std::stoi(indices.at(2))); + return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx}; + }; + // read pkg-inputs and pkg-outputs + const Json::Value &pkg_inputs = root["pkg-inputs"]; + for (uint32_t i = 0; i < pkg_inputs.size(); ++i) + _nnpkg->addInput(toIODesc(pkg_inputs[i].asString())); + const Json::Value &pkg_outputs = root["pkg-outputs"]; + for (uint32_t i = 0; i < pkg_outputs.size(); ++i) + _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString())); + // read model-connect + const Json::Value &fromtos = root["model-connect"]; + for (uint32_t i = 0; i < fromtos.size(); ++i) { - std::cerr << "Unsupported model type in MANIFEST" << std::endl; - return NNFW_STATUS_ERROR; + const Json::Value &tos = fromtos[i]["to"]; + for (uint32_t j = 0; j < tos.size(); ++j) + _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString())); } - _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get()); - - _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -396,9 +420,17 @@ NNFW_STATUS nnfw_session::prepare() try { - _subgraphs.reset(); - std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile(); - _execution = std::make_unique<onert::exec::Execution>(executors); + // TODO: Compile all models in case of multiple models + if (_nnpkg->model_count() > 2) + { + std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet." + << std::endl; + return NNFW_STATUS_ERROR; + } + auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions); + _nnpkg.reset(); + _compiler_artifact = compiler->compile(); + _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors); } catch (const std::exception &e) { @@ -430,13 +462,14 @@ NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path) try { - _subgraphs.reset(); - std::vector<std::shared_ptr<onert::exec::ExecutorMap>> executor_maps = - _compiler->compile(_package_file_path.c_str(), map_file_path); + auto model = _nnpkg->primary_model(); + auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]); + _nnpkg.reset(); + auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path); - for (auto it = executor_maps.begin(); it != executor_maps.end(); ++it) + for (auto it = artifacts.begin(); it != artifacts.end(); ++it) { - _executions.push_back(std::make_shared<onert::exec::Execution>(*it)); + _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors)); } make_dependency(); _threads.resize(_executions.size()); @@ -740,7 +773,8 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti) { // In this case, if we apply input shape in primary_subgraph, it will propagate after // compilation and excution - auto primary_subgraph = _subgraphs->primary(); + auto model = _nnpkg->primary_model(); + auto primary_subgraph = model->primary_subgraph(); auto ind = primary_subgraph->getInputs().at(index); auto &input = primary_subgraph->operands().at(ind); @@ -851,12 +885,12 @@ void nnfw_session::make_dependency() { for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++) { - auto out_graph = _executions[out_exe]->primary_subgraph(); + auto &out_graph = _executions[out_exe]->primary_subgraph(); for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++) { if (out_exe == in_exe) continue; - auto in_graph = _executions[in_exe]->primary_subgraph(); + auto &in_graph = _executions[in_exe]->primary_subgraph(); for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end(); out++) { @@ -971,7 +1005,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends) if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false) return NNFW_STATUS_ERROR; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; using namespace onert::util; @@ -1005,7 +1039,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend) return NNFW_STATUS_ERROR; } - auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend; + auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend; opcode_to_backend.emplace(onert::ir::toOpCode(key), backend); } catch (const std::exception &e) @@ -1024,7 +1058,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value) if (!key || !value) return NNFW_STATUS_UNEXPECTED_NULL; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; using namespace onert::util; @@ -1067,14 +1101,14 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value) const onert::ir::Graph *nnfw_session::primary_subgraph() { - if (_subgraphs) + if (_nnpkg != nullptr) { - assert(!_execution && _executions.empty()); - return _subgraphs->primary().get(); + assert(_execution == nullptr && _executions.empty()); + return _nnpkg->primary_model()->primary_subgraph().get(); } else { - assert(_execution || !_executions.empty()); + assert(_execution != nullptr || !_executions.empty()); // TODO Remove const_cast // We assumed the graph will not change after compilation, but shape could change if (!_executions.empty()) @@ -1094,7 +1128,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_ if (!key || !value) return NNFW_STATUS_UNEXPECTED_NULL; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; auto check_boundary = [](size_t dest_size, std::string &src) { if (dest_size < src.length() + 1 /* for '\0' */) @@ -1138,9 +1172,9 @@ bool nnfw_session::isStateInitialized() { if (_state == State::INITIALIZED) { - assert(!_subgraphs); - assert(!_compiler); - assert(!_execution && _executions.empty()); + assert(_nnpkg == nullptr); + assert(_coptions.empty()); + assert(_execution == nullptr && _executions.empty()); return true; } else @@ -1153,9 +1187,9 @@ bool nnfw_session::isStateModelLoaded() { if (_state == State::MODEL_LOADED) { - assert(_subgraphs); - assert(_compiler); - assert(!_execution && _executions.empty()); + assert(_nnpkg != nullptr); + assert(!_coptions.empty()); + assert(_execution == nullptr && _executions.empty()); return true; } else @@ -1168,9 +1202,9 @@ bool nnfw_session::isStatePrepared() { if (_state == State::PREPARED) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } else @@ -1183,9 +1217,9 @@ bool nnfw_session::isStateRunning() { if (_state == State::RUNNING) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } return false; @@ -1195,9 +1229,9 @@ bool nnfw_session::isStateFinishedRun() { if (_state == State::FINISHED_RUN) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } else @@ -1224,9 +1258,14 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings) { if (backend_settings == NULL) - { return NNFW_STATUS_ERROR; - } - _compiler->set_backend_from_str(backend_settings); + + if (!isStateModelLoaded()) + return NNFW_STATUS_INVALID_STATE; + + // Backend for all + auto &ms_options = _coptions[0]->manual_scheduler_options; + ms_options.setBackendMap(std::string{backend_settings}); + return NNFW_STATUS_NO_ERROR; } diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h index 6d75d894f..9b729fd5f 100644 --- a/runtime/onert/api/src/nnfw_api_internal.h +++ b/runtime/onert/api/src/nnfw_api_internal.h @@ -20,7 +20,6 @@ #include "nnfw.h" #include "nnfw_experimental.h" -#include <util/GeneralConfigSource.h> #include <util/TracingCtx.h> #include <string> @@ -41,11 +40,13 @@ class Execution; namespace ir { class Graph; -class Subgraphs; +class Model; +class NNPkg; } // namespace ir namespace compiler { -class Compiler; +struct CompilerArtifact; +class CompilerOptions; } // namespace compiler } // namespace onert @@ -97,9 +98,18 @@ private: }; public: + /** + * @brief Factory method. It creates and initialize nnfw_session + * + * @note Use factory instead of constructor to get status + */ + static NNFW_STATUS create(nnfw_session **session); + +private: nnfw_session(); - ~nnfw_session(); +public: + ~nnfw_session(); NNFW_STATUS load_model_from_nnpackage(const char *package_file_path); NNFW_STATUS prepare(); NNFW_STATUS prepare_pipeline(const char *map_file_path); @@ -148,6 +158,10 @@ public: NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func); NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index); NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index); + /** + * @brief Set backends with string-encoded mapping from operation index to backend type + * (cpu, acl_cl) + */ NNFW_STATUS set_backends_per_operation(const char *backend_settings); private: @@ -161,15 +175,14 @@ private: private: State _state{State::INITIALIZED}; - std::shared_ptr<onert::ir::Subgraphs> _subgraphs; - std::unique_ptr<onert::compiler::Compiler> _compiler; + std::shared_ptr<onert::ir::NNPkg> _nnpkg; + std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> _coptions; + std::shared_ptr<onert::compiler::CompilerArtifact> _compiler_artifact; std::unique_ptr<onert::exec::Execution> _execution; std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry; std::vector<std::thread> _threads; std::vector<std::shared_ptr<onert::exec::Execution>> _executions; std::string _package_file_path; - - std::unique_ptr<onert::util::TracingCtx> _tracing_ctx; }; #endif // __API_NNFW_API_INTERNAL_H__ diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 945ad83bb..301ded01f 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -46,8 +46,10 @@ public: { const auto &graph = *data.graph; const auto &operands = data.graph->operands(); + const auto is_linear_executor = data.is_linear_executor; + auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data)); - auto tm = createTensorManager(data.is_linear_executor); + auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index 62b163b11..1c7713055 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -46,8 +46,10 @@ public: { const auto &graph = *data.graph; const auto &operands = data.graph->operands(); + const auto is_linear_executor = data.is_linear_executor; + auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data)); - auto tm = createTensorManager(data.is_linear_executor); + auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); auto tb = std::make_shared<TensorBuilder>(operands, tm); context->tensor_registry = tr; diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt index b61e58251..99643b983 100644 --- a/runtime/onert/backend/cpu/CMakeLists.txt +++ b/runtime/onert/backend/cpu/CMakeLists.txt @@ -6,7 +6,7 @@ file(GLOB_RECURSE SOURCES "*.cc") add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES}) -target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker) +target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage) diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index ab0bb5f10..6ed4799a8 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -20,6 +20,8 @@ #include <util/ConfigSource.h> #include <ruy/context.h> +#include <memory> + namespace onert { namespace backend diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 75274dc88..762ee7392 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -244,17 +244,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 2255d5e9f..4672fe406 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -63,7 +63,7 @@ void ConvolutionLayer::convFloat32() getBuffer<float>(_output)); } -void ConvolutionLayer::convQuant8() +void ConvolutionLayer::convQ8uPerTensor() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -99,7 +99,33 @@ void ConvolutionLayer::convQuant8() getBuffer<uint8_t>(_output)); } -void ConvolutionLayer::convQuant8PerChannel() +void ConvolutionLayer::convQ8uPerChannel() +{ + nnfw::cker::ConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; + op_params.input_offset = -_input->data_zero_point(); + op_params.output_offset = _output->data_zero_point(); + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeQuantized(_activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + // NOTE: The following fields of ConvParams are not used: + // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max} + + nnfw::cker::Conv &kernel = *_conv_kernel; + kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), + getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias), + getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output)); +} + +void ConvolutionLayer::convQ8i() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -189,11 +215,15 @@ void ConvolutionLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - convQuant8(); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + convQ8uPerChannel(); + else + convQ8uPerTensor(); } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { - convQuant8PerChannel(); + convQ8i(); } else { @@ -210,8 +240,8 @@ void ConvolutionLayer::prepare() if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant()) { bool is_transposed = false; - kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType), - is_transposed, _dilationWidthFactor, _dilationHeightFactor); + kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType), + is_transposed, _dilationWidthFactor, _dilationHeightFactor); // Decrease reference of _kernel(weights) only when _kernel is constant if (is_transposed) @@ -225,8 +255,20 @@ void ConvolutionLayer::prepare() else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic()) { - kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth, - _strideHeight, _dilationWidthFactor, _dilationHeightFactor); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + { + GetQuantizedConvolutionMultipliersAndShifts( + _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), + _kernel->data_scales().size(), getShape(_kernel).Dims(0), + kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift()); + } + else + { + kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output), + _strideWidth, _strideHeight, _dilationWidthFactor, + _dilationHeightFactor); + } } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h index 5d7f7c296..9f5253c8e 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h @@ -50,9 +50,10 @@ public: public: void convFloat32(); - void convQuant8(); + void convQ8uPerTensor(); + void convQ8uPerChannel(); - void convQuant8PerChannel(); + void convQ8i(); void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, ir::PaddingType _paddingType, diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index 30641ecae..8a48497d5 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -49,7 +49,7 @@ void DepthwiseConvolutionLayer::convFloat32() getBuffer<float>(_output), _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::convQuant8() +void DepthwiseConvolutionLayer::convQ8uPerTensor() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -84,11 +84,39 @@ void DepthwiseConvolutionLayer::convQuant8() getBuffer<uint8_t>(_output), _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::convQuant8PerChannel() +void DepthwiseConvolutionLayer::convQ8uPerChannel() +{ + nnfw::cker::DepthwiseConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidth; + op_params.dilation_height_factor = _dilationHeight; + op_params.depth_multiplier = _multiplier; + op_params.input_offset = -_input->data_zero_point(); + op_params.output_offset = _output->data_zero_point(); + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeQuantized(_activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + // NOTE: The following fields of ConvParams are not used: + // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max} + + nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel( + op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(), + getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel), + _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias), + getShape(_output), getBuffer<uint8_t>(_output)); +} + +void DepthwiseConvolutionLayer::convQ8i() { if (!_prepared) { - prepareQuant8PerChannel(); + prepareQ8i(); _prepared = true; } @@ -119,7 +147,15 @@ void DepthwiseConvolutionLayer::convQuant8PerChannel() _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::prepareQuant8PerChannel() +void DepthwiseConvolutionLayer::prepareQ8i() +{ + GetQuantizedConvolutionMultipliersAndShifts( + _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), + _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier, + _per_channel_output_shift); +} + +void DepthwiseConvolutionLayer::prepareQ8uPerChannel() { GetQuantizedConvolutionMultipliersAndShifts( _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), @@ -155,7 +191,17 @@ void DepthwiseConvolutionLayer::configure( { if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic()) { - prepareQuant8PerChannel(); + prepareQ8i(); + _prepared = true; + } + } + else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() && + !_input->is_dynamic() && !_output->is_dynamic()) + { + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + { + prepareQ8uPerChannel(); _prepared = true; } } @@ -169,11 +215,15 @@ void DepthwiseConvolutionLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - convQuant8(); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + convQ8uPerChannel(); + else + convQ8uPerTensor(); } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { - convQuant8PerChannel(); + convQ8i(); } else { diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h index 720550636..5c910109a 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h @@ -40,9 +40,10 @@ public: public: void convFloat32(); - void convQuant8(); + void convQ8uPerTensor(); + void convQ8uPerChannel(); - void convQuant8PerChannel(); + void convQ8i(); void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, @@ -55,7 +56,8 @@ public: void run() override; private: - void prepareQuant8PerChannel(); + void prepareQ8i(); + void prepareQ8uPerChannel(); private: const IPortableTensor *_input{nullptr}; diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc index 8a6fe6504..d89741c86 100644 --- a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc +++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc @@ -121,7 +121,9 @@ Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<fl assert(box.y2 > box.y1); } - return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape()); + auto decoded_boxes_a_shape = decoded_boxes_a.shape(); + + return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape); } } diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h index 3cc4eaa5a..c73ae636e 100644 --- a/runtime/onert/backend/ruy/ExternalContext.h +++ b/runtime/onert/backend/ruy/ExternalContext.h @@ -20,6 +20,8 @@ #include <util/ConfigSource.h> #include <ruy/context.h> +#include <memory> + namespace onert { namespace backend diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc index c2f6a1f79..b2bbf9bfc 100644 --- a/runtime/onert/backend/ruy/KernelGenerator.cc +++ b/runtime/onert/backend/ruy/KernelGenerator.cc @@ -42,17 +42,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt index 5455757ca..a94be247d 100644 --- a/runtime/onert/backend/trix/CMakeLists.txt +++ b/runtime/onert/backend/trix/CMakeLists.txt @@ -1,6 +1,6 @@ set(LIB_ONERT_BACKEND_TRIX onert_backend_trix) -nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +nnfw_find_package(TRIXEngine QUIET 2.5.0) if(NOT TRIXEngine_FOUND) return() endif(NOT TRIXEngine_FOUND) diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h index 482932fd4..a7dbd7a59 100644 --- a/runtime/onert/backend/trix/DevContext.h +++ b/runtime/onert/backend/trix/DevContext.h @@ -32,28 +32,42 @@ public: DevContext() { auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP); + // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now. + if (device_count > 2) + { + device_count = 2; + } + if (device_count <= 0) { - throw std::runtime_error("Unable to find TRIV2 NPU device"); + throw std::runtime_error("Unable to find TRIX NPU device"); } - // Use NPU 0 device - if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0) + for (int i = 0; i < device_count; i++) { - throw std::runtime_error("Failed to get TRIV2 NPU device handle"); + npudev_h h; + if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0) + { + throw std::runtime_error("Failed to get TRIX NPU device handle"); + } + _dev_handles.push_back(h); } } ~DevContext() { - if (_dev_handle != nullptr) + for (auto h : _dev_handles) { - unregisterNPUmodel_all(_dev_handle); - putNPUdevice(_dev_handle); + if (h != nullptr) + { + unregisterNPUmodel_all(h); + putNPUdevice(h); + } } } - npudev_h getDev() { return _dev_handle; } + npudev_h getDev(int i) { return _dev_handles[i]; } + int getDevSize() { return _dev_handles.size(); } template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors) { @@ -66,14 +80,15 @@ public: } } - template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors) + template <typename T> + void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index) { buf->num_buffers = static_cast<uint32_t>(tensors.size()); for (uint32_t idx = 0; idx < buf->num_buffers; ++idx) { - buf->bufs[idx].addr = tensors[idx]->buffer(); - buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size()); + buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size); + buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size); buf->bufs[idx].type = BUFFER_MAPPED; } } @@ -106,9 +121,8 @@ private: } private: - // NPU device handle - // TODO Support multicore npu device - npudev_h _dev_handle; + // NPU device handles + std::vector<npudev_h> _dev_handles; }; } // namespace trix diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc index 71fdf3f0d..3c49da9a3 100644 --- a/runtime/onert/backend/trix/ops/BulkLayer.cc +++ b/runtime/onert/backend/trix/ops/BulkLayer.cc @@ -18,6 +18,7 @@ #include <util/logging.h> #include <libnpuhost.h> +#include <future> namespace onert { @@ -49,24 +50,56 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs, throw std::runtime_error("Unable to extract the model metadata"); } + _model_id.resize(_dev_context->getDevSize()); + generic_buffer model_file; model_file.type = BUFFER_FILE; model_file.filepath = binary_path.c_str(); model_file.size = _meta->size; - if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0) + for (int i = 0; i < _dev_context->getDevSize(); i++) + { + if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0) + { + throw std::runtime_error("Failed to register npu model"); + } + } +} + +void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info, + output_buffers *output_buf, tensors_data_info *out_info) +{ + if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info)) + { + throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) + + ")"); + } + + if (submitNPU_request(dev, req_id)) { - throw std::runtime_error("Failed to register npu model"); + throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) + + ")"); } } void BulkLayer::run() { - int req_id; - if (createNPU_request(_dev_context->getDev(), _model_id, &req_id)) + // TODO: Remove too many assumption + // We assume user wants batch execution if user's input size is multiples of model's input size + int user_input_batch = (_inputs[0]->get_info().shape()).dim(0); + int model_input_batch = _meta->input_seg_dims[0][0]; + int batch_size = user_input_batch / model_input_batch; + bool is_batch_execution = (batch_size != 1 ? true : false); + + std::vector<int> req_id(_dev_context->getDevSize()); + + for (int i = 0; i < _dev_context->getDevSize(); i++) { - throw std::runtime_error("Unable to create NPU request with model id (" + - std::to_string(_model_id) + ")"); + if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i])) + { + throw std::runtime_error("Unable to create NPU request with model id (" + + std::to_string(_model_id[i]) + ")"); + } } if (_meta->input_seg_num != _inputs.size()) @@ -84,28 +117,58 @@ void BulkLayer::run() _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs); _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs); - input_buffers input_buf; - output_buffers output_buf; - _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs); - _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs); + std::vector<input_buffers> input_buf; + std::vector<output_buffers> output_buf; + input_buf.resize(_dev_context->getDevSize()); + output_buf.resize(_dev_context->getDevSize()); + + std::vector<std::future<void>> f(_dev_context->getDevSize()); - if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf, - &out_info)) + const int num_cores = _dev_context->getDevSize(); + if (is_batch_execution) { - throw std::runtime_error("Unable to create NPU request for model id (" + - std::to_string(_model_id) + ")"); + // TODO: Support for general number of cores(>2) + // Here we assume that 2 trix cores + for (int i = 0; i < (batch_size); i = i + num_cores) + { + for (int core = 0; core < num_cores; core++) + { + _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size, + i + core); + _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core); + } + for (int core = 0; core < num_cores; core++) + { + + if (i + core < batch_size) + { + f[core] = + std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core], + &input_buf[core], &in_info, &output_buf[core], &out_info); + } + } + for (int core = 0; core < num_cores; core++) + { + f[core].wait(); + } + } } - - if (submitNPU_request(_dev_context->getDev(), req_id)) + else { - throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) + - ")"); + _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0); + _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0); + + single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0], + &out_info); } - if (removeNPU_request(_dev_context->getDev(), req_id)) + for (int i = 0; i < _dev_context->getDevSize(); i++) { - throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) + - ")"); + if (removeNPU_request(_dev_context->getDev(i), req_id[i])) + { + throw std::runtime_error("Unable to remove NPU request with req id (" + + std::to_string(req_id[i]) + ")"); + } } } diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h index f7080ccad..614c0f728 100644 --- a/runtime/onert/backend/trix/ops/BulkLayer.h +++ b/runtime/onert/backend/trix/ops/BulkLayer.h @@ -50,7 +50,7 @@ private: std::vector<const IPortableTensor *> _inputs; std::vector<IPortableTensor *> _outputs; - uint32_t _model_id; + std::vector<uint32_t> _model_id; npubin_meta *_meta; std::shared_ptr<DevContext> _dev_context; }; diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc index 28f729d77..9580bec8c 100644 --- a/runtime/onert/backend/xnnpack/KernelGenerator.cc +++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc @@ -56,17 +56,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt index 6dbadf80b..87c7a13e4 100644 --- a/runtime/onert/core/CMakeLists.txt +++ b/runtime/onert/core/CMakeLists.txt @@ -6,14 +6,18 @@ nnfw_find_package(Ruy REQUIRED) add_library(onert_core SHARED ${SOURCES}) set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# NOTE +# We publish public headers into developer package. +# To avoid mistake using private header in public header, do not define +# private target_include_directories scope for src/ directory. target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) -target_link_libraries(onert_core PUBLIC nnfw_lib_misc half) -target_link_libraries(onert_core PRIVATE nnfw_lib_cker) + +target_link_libraries(onert_core PRIVATE jsoncpp half) +target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker) target_link_libraries(onert_core PRIVATE nnfw_common) target_link_libraries(onert_core PRIVATE nnfw_coverage) target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD}) -target_link_libraries(onert_core PRIVATE jsoncpp) target_link_libraries(onert_core PRIVATE ruy) target_link_libraries(onert_core INTERFACE ruy_instrumentation) @@ -48,6 +52,8 @@ set(TEST_ONERT_CORE test_onert_core) add_executable(${TEST_ONERT_CORE} ${TESTS}) target_link_libraries(${TEST_ONERT_CORE} onert_core) +# Requires linking nnfw_coverage: check header coverage +target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage) target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD}) add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE}) diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h index 0a4d9c814..560416264 100644 --- a/runtime/onert/core/include/backend/ITensor.h +++ b/runtime/onert/core/include/backend/ITensor.h @@ -20,6 +20,7 @@ #include <cstring> #include <cstdint> #include <functional> +#include <stdexcept> #include "ir/DataType.h" #include "ir/Layout.h" diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 58bfe3406..cf2da4c34 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -103,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct // 1. Scan DEF of outputs. If the DEF, allocate it // 2. Scan DEF of inputs. If variable tensor, allocate it // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto op_ind : order) + for (const auto &op_ind : order) { const auto &op = graph.operations().at(op_ind); auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; @@ -161,7 +161,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct } } - for (auto ind : operands_last_until_end) + for (auto &ind : operands_last_until_end) { tensor_builder->notifyLastUse(ind); } diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h index befe40022..b44fcf836 100644 --- a/runtime/onert/core/include/compiler/BackendManager.h +++ b/runtime/onert/core/include/compiler/BackendManager.h @@ -17,12 +17,11 @@ #ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__ #define __ONERT_COMPILER_BACKEND_MANAGER_H__ -#include <memory> -#include <map> - -#include "ir/Operands.h" #include "backend/Backend.h" -#include "backend/builtin/Backend.h" +#include "ir/Operands.h" + +#include <map> +#include <memory> namespace onert { @@ -41,7 +40,7 @@ public: public: backend::Backend *get(const std::string &key); const backend::Backend *get(const std::string &key) const; - const backend::builtin::Backend *getBuiltin() const; + const backend::Backend *getBuiltin() const; const std::vector<const backend::Backend *> getAll() const { std::vector<const backend::Backend *> v; @@ -65,7 +64,7 @@ private: private: std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map; std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map; - backend::builtin::Backend *_builtin{nullptr}; + backend::Backend *_builtin{nullptr}; /** * @brief load builtin backend * diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h index 292de4b12..f05d63c66 100644 --- a/runtime/onert/core/include/compiler/Compiler.h +++ b/runtime/onert/core/include/compiler/Compiler.h @@ -22,8 +22,8 @@ #ifndef __ONERT_COMPILER_COMPILE_H_ #define __ONERT_COMPILER_COMPILE_H_ -#include "ir/Graph.h" -#include "exec/IExecutor.h" +#include "ir/NNPkg.h" +#include "exec/Executors.h" #include "util/TracingCtx.h" namespace onert @@ -40,6 +40,10 @@ enum class State struct ManualSchedulerOptions { +public: + void setBackendMap(const std::string &str); + +public: std::string backend_for_all; std::unordered_map<ir::OpCode, std::string> opcode_to_backend; std::unordered_map<ir::OperationIndex, std::string> index_to_backend; @@ -50,8 +54,14 @@ struct PartialGraphOptions std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph; }; -struct CompilerOptions +class CompilerOptions { +public: + // Set default values for CompilerOptions + // All these default values should not be fetched from Env, when we stop supporting Android NNAPI. + static std::unique_ptr<CompilerOptions> fromGlobalConfig(); + +public: // GENERAL OPTIONS std::vector<std::string> backend_list; @@ -65,75 +75,85 @@ struct CompilerOptions bool disable_compile; //< Run with Interpreter if true, try compilation otherwise bool fp16_enable; //< Whether fp16 mode ON/OFF PartialGraphOptions partial_graph_options; - - util::TracingCtx *tracing_ctx; //< Profiling information }; -CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs); +struct CompilerArtifact +{ + CompilerArtifact(void) = delete; + CompilerArtifact(std::shared_ptr<exec::Executors> executors, + std::unique_ptr<const util::TracingCtx> tracing_ctx) + : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {}; + + std::shared_ptr<exec::Executors> _executors; + std::unique_ptr<const util::TracingCtx> _tracing_ctx; +}; /** - * @brief Class to compile graph model + * @brief Class to compile NN package */ class Compiler { public: /** - * @brief Construct a new Compiler object - * @param[in] subgs All subgraphs of a model - * @param[in] tracing_ctx Profiling information + * @brief Construct a new Compiler object for single model + * @param[in] model model to compile + * @param[in] coptions Compiler Options + */ + Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt); + + /** + * @brief Construct a new Compiler object for NN package + * @param[in] nnpkg NN package to compile + * @param[in] coptions Compiler option vector for each model in package */ - Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx); + Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts); public: /** * @brief Do compilation with the options * - * @return std::shared_ptr<exec::ExecutorMap> Executors as a result of compilation + * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation */ - std::shared_ptr<exec::ExecutorMap> compile(void); + std::shared_ptr<CompilerArtifact> compile(void); /** * @brief Do compilation with the options * - * @return std::vector<std::shared_ptr<exec::ExecutorMap>> Executors as a result of compilation + * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation * for pipeline */ - std::vector<std::shared_ptr<exec::ExecutorMap>> compile(const char *package_file_path, - const char *map_file_path); + std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path, + const char *map_file_path); State state(void) const { return _state; } - CompilerOptions &options() { return _options; } - /** * @brief Allow to compute float32 using float16 data type */ void enableToFp16(); /** - * @brief Set backends from string-encoded mappings from operation index to backend type (cpu, - * acl_cl) - */ - void set_backend_from_str(const char *backend_settings); - - /** * @brief Build the partial graphs to compile with original graph */ bool buildPartialGraph(uint32_t num_graphs); private: void checkProfilerConditions(); - std::shared_ptr<ir::Graph> &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); } + std::shared_ptr<ir::Graph> &primary_subgraph() + { + return _nnpkg->primary_model()->at(ir::SubgraphIndex{0}); + } private: - std::shared_ptr<ir::Subgraphs> _subgraphs; + std::shared_ptr<ir::NNPkg> _nnpkg; // NOTE These executors does not have duplicated subgraph. This mean they do not allow support // subgraphs being called recursively because data of non-constant tensor of parent executor will // be updated by child executor. If you want to support subgraphs being called recursively, you // have to add allocate non-constant tensor memory of executors in execution time when each // subgraph is called. State _state; - CompilerOptions _options; + std::vector<CompilerOptions *> _voptions; }; } // namespace compiler diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h index 10ca8e9fc..7264f2a10 100644 --- a/runtime/onert/core/include/compiler/LoweredGraph.h +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -60,9 +60,14 @@ public: private: void makeLowerInfo(const compiler::BackendResolver &backend_resolver); void dumpLowerInfo(); - void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options); + void lowerGraph(const compiler::CompilerOptions &options); private: + /** + * @brief Copy of target graph for lowering + * @note It uses copy of graph, not reference. + * It allows the original graph can be compiled multiple times. + */ ir::Graph _graph; ir::Graph _parent_graph; std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h index b2272e262..f701dc207 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInferer.h +++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h @@ -28,6 +28,36 @@ namespace onert { namespace compiler { +/** + * @brief Class that observe and update operands. + */ +class OperandObserver +{ +public: + /** + * @brief Constructor of OperandObserver + * + * @param operands Operands to be updated + */ + OperandObserver(const std::vector<ir::Operand *> &operands) : _operands{operands} {} + /** + * @brief Destructor of OperandObserver + */ + virtual ~OperandObserver() = default; + +public: + /** + * @brief Update Shape and some OperandInfo of operands + * + * @param operands Operands to be updated + * @param unpredictable Whether runtime can predict shapes of operands in compilation time + */ + void updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info, + bool unpredictable = false); + +private: + std::vector<ir::Operand *> _operands; +}; /** * @brief Class to infer shape before running kernels. It does the following: @@ -38,32 +68,42 @@ namespace compiler class StaticShapeInferer : public ir::OperationVisitor { public: - StaticShapeInferer( - const ir::SubgraphIndex &subg_idx, - const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> - &lowered_subgs) - : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()), - _operations(lowered_subgs.at(subg_idx)->graph().operations()), - _return_has_dynamic_tensor(false) - { /* empty */ + StaticShapeInferer(compiler::LoweredGraph *lowered_subg) + : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr}, + _child_inferers{} + { } virtual ~StaticShapeInferer() = default; public: + void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx, + std::unique_ptr<OperandObserver> &&subg_input_observer) noexcept + { + _subg_input_observers[subg_idx] = std::move(subg_input_observer); + } + + void setControlflowOutputObserver(std::unique_ptr<OperandObserver> &&output_observer) noexcept + { + _controlflow_output_observer = std::move(output_observer); + } + + void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer) + { + _child_inferers[subg_idx] = inferer; + } + /** - * @brief Infer shape of operands beloning to ops and set the output shape. + * @brief Infer shape of operands belonging to ops and set the output shape. * If output shape cannot be known without running op, mark it so that it can be allocated * when running kernel. - * @param op Operation - * @return @c true if op's input or output has any dynamic tensor; @c false otherwise. */ - bool infer(const ir::Operation &op); + void infer(void); void dump(); private: - void inferSubgraph(ir::SubgraphIndex subg_ind); bool checkDynamicInput(const ir::Operation &op); + bool checkDynamicOutput(const ir::Operation &op); void setDynamicOutput(const ir::Operation &op); private: @@ -113,6 +153,7 @@ private: void visit(const ir::operation::Unpack &op) override; void visit(const ir::operation::While &op) override; void visit(const ir::operation::DetectionPostProcess &op) override; + void visit(const ir::operation::Bulk &op) override; private: /** @@ -128,12 +169,11 @@ private: void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx); private: - const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> - &_lowered_subgs; - // _operands and _operations can be changed by controlflow operation - ir::Operands &_operands; // operands of current subgraph - ir::Operations &_operations; // operations of current subgraph - bool _return_has_dynamic_tensor; + compiler::LoweredGraph *_lowered_subg; + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<OperandObserver>> + _subg_input_observers; // child subg input + std::unique_ptr<OperandObserver> _controlflow_output_observer; // parent controlflow op output + std::unordered_map<ir::SubgraphIndex, compiler::StaticShapeInferer *> _child_inferers; }; } // namespace compiler diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h index b0a5cced3..1e8083c4c 100644 --- a/runtime/onert/core/include/exec/Execution.h +++ b/runtime/onert/core/include/exec/Execution.h @@ -22,7 +22,7 @@ #define __ONERT_EXEC_EXECUTION_H__ #include "ir/Layout.h" -#include "exec/IExecutor.h" +#include "exec/Executors.h" #include "IODescription.h" #include <thread> @@ -46,7 +46,7 @@ public: * @brief Construct a new Execution object * @param[in] executor Model executor */ - Execution(const std::shared_ptr<ExecutorMap> &executors); + Execution(const std::shared_ptr<Executors> &executors); public: /** @@ -250,7 +250,7 @@ private: std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); }; private: - const std::shared_ptr<ExecutorMap> _executors; + const std::shared_ptr<Executors> _executors; IODescription _io_desc; std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs; sem_t _async_io_descs_sem; diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h new file mode 100644 index 000000000..5adb0eda4 --- /dev/null +++ b/runtime/onert/core/include/exec/Executors.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_EXECUTORS_H__ +#define __ONERT_EXEC_EXECUTORS_H__ + +#include "IExecutor.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace exec +{ + +/** + * @brief Class to gather executors + */ +class Executors +{ +public: + Executors(void) = default; + Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); } + Executors(const Executors &) = delete; + Executors(Executors &&) = default; + + // TODO Use Executor index + void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec) + { + _executors.emplace(idx, std::move(exec)); + } + + std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); } + + uint32_t inputSize() const; + + uint32_t outputSize() const; + + const ir::OperandInfo inputInfo(const ir::IOIndex &index); + + const ir::OperandInfo outputInfo(const ir::IOIndex &index); + + void execute(const IODescription &desc); + +private: + void executeEntries(const IODescription &desc); + +private: + // TODO Use Executor index + // Changing index will effect if/while compile and kernel implementation + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors; + // NOTE _model_edges may use different struct type for executor implementation + std::unique_ptr<ir::ModelEdges> _model_edges; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_EXECUTORS_H__ diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h index cf3f2a882..7ff6d8b8c 100644 --- a/runtime/onert/core/include/exec/FunctionSequence.h +++ b/runtime/onert/core/include/exec/FunctionSequence.h @@ -75,8 +75,7 @@ public: public: // methods related to dynamic tensor struct DynamicTensorCtx { - ir::OperationIndex op_ind; - const ir::Operations *operations = nullptr; + const ir::Operation *op = nullptr; std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr; }; diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h index adc68074f..bb5b5af98 100644 --- a/runtime/onert/core/include/exec/IExecutor.h +++ b/runtime/onert/core/include/exec/IExecutor.h @@ -107,8 +107,6 @@ struct IExecutor virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0; }; -using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>; - } // namespace exec } // namespace onert diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h index 7a7688334..286caf72f 100644 --- a/runtime/onert/core/include/ir/Graph.h +++ b/runtime/onert/core/include/ir/Graph.h @@ -20,9 +20,9 @@ #include <functional> #include <unordered_map> +#include "ir/Model.h" #include "ir/Operands.h" #include "ir/Operations.h" -#include "ir/Subgraphs.h" namespace onert { @@ -50,7 +50,9 @@ private: }; public: - Graph(void); + explicit Graph(void); + explicit Graph(const Graph &); + ~Graph(void); // Graph Building @@ -87,10 +89,9 @@ public: void verify(void); void removeOperand(const OperandIndex &ind) { _operands.remove(ind); } void setLayout(Layout layout) { _layout = layout; } - void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; } - void setPartialgraphs(const std::shared_ptr<Subgraphs> &partialgraphs) + void setPartialModel(const std::shared_ptr<Model> &partial_model) { - _partialgraphs = partialgraphs; + _partialgraphs = partial_model; } void setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names) @@ -134,27 +135,25 @@ public: Operands &operands() { return _operands; } // TODO Remove this non-const accessor const Operations &operations() const { return _operations; } Operations &operations() { return _operations; } - const std::shared_ptr<Subgraphs> &subgraphs() const { return _subgraphs; } - std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; } Layout layout() const { return _layout; } - std::shared_ptr<Subgraphs> &partialgraphs() { return _partialgraphs; } + std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; } std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names() { return _tensor_names; } - std::unordered_map<std::string, IOIndex>::iterator _name_to_input_begin() + std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const { return _name_to_input.begin(); } - std::unordered_map<std::string, IOIndex>::iterator _name_to_input_end() + std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const { return _name_to_input.end(); } - std::unordered_map<std::string, IOIndex>::iterator _name_to_output_begin() + std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const { return _name_to_output.begin(); } - std::unordered_map<std::string, IOIndex>::iterator _name_to_output_end() + std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const { return _name_to_output.end(); } @@ -172,13 +171,11 @@ private: OperandIndexSequence _outputs; std::unordered_map<std::string, IOIndex> _name_to_input; std::unordered_map<std::string, IOIndex> _name_to_output; - // Child subgraphs - std::shared_ptr<Subgraphs> _subgraphs; // TFLite and circle's default layout is NHWC; Layout _layout{Layout::NHWC}; - // Partial Graphs - std::shared_ptr<ir::Subgraphs> _partialgraphs; + // model for partial graphs + std::shared_ptr<ir::Model> _partialgraphs; std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names; }; diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h index e01b090f3..f01a4c84d 100644 --- a/runtime/onert/core/include/ir/Index.h +++ b/runtime/onert/core/include/ir/Index.h @@ -38,6 +38,9 @@ using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>; struct SubgraphIndexTag; using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>; +struct ModelIndexTag; +using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>; + template <typename IndexType> std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index) { @@ -64,7 +67,12 @@ inline std::ostream &operator<<(std::ostream &o, const IOIndex &i) inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i) { - return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph + return _index_print_impl(o, "SUBGRAPH", i); +} + +inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i) +{ + return _index_print_impl(o, "MODEL", i); } } // namespace ir diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h index 082810172..0cdbcc2c8 100644 --- a/runtime/onert/core/include/ir/Layout.h +++ b/runtime/onert/core/include/ir/Layout.h @@ -18,6 +18,7 @@ #define __ONERT_IR_LAYOUT_H__ #include <functional> +#include <stdexcept> #include <string> namespace onert diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Model.h index 6cb369447..c3c0d87b8 100644 --- a/runtime/onert/core/include/ir/Subgraphs.h +++ b/runtime/onert/core/include/ir/Model.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_IR_SUBGRAPHS_H__ -#define __ONERT_IR_SUBGRAPHS_H__ +#ifndef __ONERT_IR_MODEL_H__ +#define __ONERT_IR_MODEL_H__ #include <memory> #include <unordered_map> @@ -30,15 +30,15 @@ namespace ir class Graph; -class Subgraphs +class Model { public: - Subgraphs() = default; - Subgraphs(const Subgraphs &obj) = default; - Subgraphs(Subgraphs &&) = default; - Subgraphs &operator=(const Subgraphs &) = default; - Subgraphs &operator=(Subgraphs &&) = default; - ~Subgraphs() = default; + Model() = default; + Model(const Model &obj) = default; + Model(Model &&) = default; + Model &operator=(const Model &) = default; + Model &operator=(Model &&) = default; + ~Model() = default; /** * @brief Put subgraph in the container with a new Index for that @@ -120,14 +120,14 @@ public: * * @return count of Subgraphs */ - size_t count() const { return _subgraphs.size(); } + size_t subgraphs_count() const { return _subgraphs.size(); } /** * @brief Return the primary subgraph * - * @return std::shared_ptr<Graph> Primary sugraph + * @return std::shared_ptr<Graph> Primary subgraph */ - std::shared_ptr<Graph> primary() const { return _subgraphs.at(SubgraphIndex{0}); } + std::shared_ptr<Graph> primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); } private: std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs; @@ -136,4 +136,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_SUBGRAPHS_H__ +#endif // __ONERT_IR_MODEL_H__ diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h new file mode 100644 index 000000000..d9f825e85 --- /dev/null +++ b/runtime/onert/core/include/ir/NNPkg.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_NNPKG_H__ +#define __ONERT_IR_NNPKG_H__ + +#include <memory> +#include <unordered_set> +#include <vector> + +#include "ir/Index.h" +#include "ir/Model.h" + +namespace onert +{ +namespace ir +{ + +using IODesc = std::tuple<ModelIndex, SubgraphIndex, IOIndex>; + +struct ModelEdge +{ + IODesc from; + IODesc to; +}; + +struct ModelEdgeEqual +{ + bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const + { + return lhs.from == rhs.from && lhs.to == rhs.to; + } +}; + +struct ModelEdgeHash +{ + size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept + { + unsigned long long h1 = (std::get<0>(edge.from).value() << 24) | + (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value(); + unsigned long long h2 = (std::get<0>(edge.to).value() << 24) | + (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value(); + return h1 + h2; + } +}; + +inline std::ostream &operator<<(std::ostream &o, const IODesc &od) +{ + o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value(); + return o; +} + +using ModelEdgeSet = std::unordered_set<ir::ModelEdge, ir::ModelEdgeHash, ir::ModelEdgeEqual>; + +/** + * @brief Struct to gather model I/O information in multimodel NN package + * Model I/O will have role one of below + * - Package input/output + * - Edge's start/finish point between model + */ +struct ModelEdges +{ + std::vector<ir::IODesc> pkg_inputs; + std::vector<ir::IODesc> pkg_outputs; + ModelEdgeSet edges; +}; + +class NNPkg +{ +public: + NNPkg() = default; + NNPkg(const NNPkg &obj) = default; + NNPkg(NNPkg &&) = default; + NNPkg &operator=(const NNPkg &) = default; + NNPkg &operator=(NNPkg &&) = default; + ~NNPkg() = default; + + NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; } + std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); } + + /** + * @brief Put model at index + * + * @param[in] model Model to be pushed + * @param[in] index Index where Model is to be pushed + */ + void push(ModelIndex index, const std::shared_ptr<Model> &model) { _models[index] = model; } + + /** + * @brief Get the count of model + * + * @return the count of models + */ + size_t model_count() const { return _models.size(); } + + /** + * @brief Get model at index + * + * @param[in] index Index of the model to be returned + * @return Model at index + */ + const std::shared_ptr<Model> &model(const ModelIndex &index) const { return _models.at(index); } + /** + * @brief Get model at index + * + * @param[in] index Index of the model to be returned + * @return Model at index + */ + std::shared_ptr<Model> &model(const ModelIndex &index) { return _models.at(index); } + + /** + * @brief Get pkg_input at index + * + * @param[in] index Index of pkg_input to be returned + * @return IODesc at index + */ + const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; } + /** + * @brief Get pkg_input at index + * + * @param[in] index Index of pkg_input to be returned + * @return IODesc at index + */ + IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; } + /** + * @brief Add input at the end + * + * @param[in] input Input IODesc to be pushed + */ + void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); } + + /** + * @brief Get pkg_output at index + * + * @param[in] index Index of pkg_output to be returned + * @return IODesc at index + */ + const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; } + /** + * @brief Get pkg_output at index + * + * @param[in] index Index of pkg_output to be returned + * @return IODesc at index + */ + IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; } + /** + * @brief Add output at the end + * + * @param[in] output Output IODesc to be pushed + */ + void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); } + + /** + * @brief Add edge between models at the end + * + * @param[in] from from IODesc + * @param[in] to to IODesc + */ + void addEdge(const IODesc &from, const IODesc &to) + { + std::cout << from << " -> " << to << std::endl; + _edges.edges.insert(ModelEdge{from, to}); + } + /** + * @brief Get model edge set + * @return Edge set reference + */ + const ModelEdges &model_edges() { return _edges; } + + // TODO: Add iterate() or getter for edges + +private: + std::unordered_map<ModelIndex, std::shared_ptr<Model>> _models; + ModelEdges _edges; +}; + +} // namespace ir +} // namespace onert + +#endif // __ONERT_IR_NNPKG_H__ diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h index 0a00da5fd..3c5062795 100644 --- a/runtime/onert/core/include/ir/TypeInfo.h +++ b/runtime/onert/core/include/ir/TypeInfo.h @@ -50,11 +50,7 @@ public: public: DataType type() const { return _type; } - float scale() const - { - assert(_quant.scales.size() == 1); - return _quant.scales[0]; - } + float scale() const { return _quant.scales[0]; } const std::vector<float> &scales() const { return _quant.scales; } int32_t zero_point() const { diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h index 1825f7fad..3c20f392f 100644 --- a/runtime/onert/core/include/ir/operation/Bulk.h +++ b/runtime/onert/core/include/ir/operation/Bulk.h @@ -32,6 +32,8 @@ public: struct Param { std::string binary_path; + std::vector<ir::Shape> origin_input_shapes; + std::vector<ir::Shape> origin_output_shapes; }; public: diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h index db76f9dde..4369ca53e 100644 --- a/runtime/onert/core/include/util/CalculateActivationRange.h +++ b/runtime/onert/core/include/util/CalculateActivationRange.h @@ -17,6 +17,8 @@ #ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ #define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ +#include <limits> + #include "ir/InternalType.h" namespace onert diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst index 89a9a6ac2..4bbc02ac3 100644 --- a/runtime/onert/core/include/util/Config.lst +++ b/runtime/onert/core/include/util/Config.lst @@ -20,7 +20,7 @@ // Name | Type | Default CONFIG(GRAPH_DOT_DUMP , int , "0") -CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;bcq") // FIXME Remove bcq +CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq CONFIG(OP_BACKEND_ALLOPS , std::string , "") CONFIG(OP_BACKEND_MAP , std::string , "") CONFIG(DISABLE_COMPILE , bool , "0") diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h index da8bc8620..d53b8106d 100644 --- a/runtime/onert/core/include/util/ConfigSource.h +++ b/runtime/onert/core/include/util/ConfigSource.h @@ -17,17 +17,17 @@ #ifndef __ONERT_UTIL_CONFIG_SOURCE_H__ #define __ONERT_UTIL_CONFIG_SOURCE_H__ -#include <memory> - -#include "IConfigSource.h" +#include <string> +#include <unordered_map> namespace onert { namespace util { -void config_source(std::unique_ptr<IConfigSource> &&source); -void config_source_ext(std::unique_ptr<IConfigSource> &&source); +using CfgKeyValues = std::unordered_map<std::string, std::string>; + +void setConfigKeyValues(const CfgKeyValues &keyValues); bool toBool(const std::string &val); int toInt(const std::string &val); diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h deleted file mode 100644 index 8c5d0e8e9..000000000 --- a/runtime/onert/core/include/util/EnvConfigSource.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ - -#include <unordered_map> - -#include "util/GeneralConfigSource.h" - -namespace onert -{ -namespace util -{ - -class EnvConfigSource final : public GeneralConfigSource -{ -public: - std::string get(const std::string &key) const override; - -private: - std::unordered_map<std::string, std::string> _default_attributes; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h deleted file mode 100644 index dedc820ec..000000000 --- a/runtime/onert/core/include/util/GeneralConfigSource.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ - -#include <unordered_map> - -#include "util/IConfigSource.h" - -namespace onert -{ -namespace util -{ - -class GeneralConfigSource : public IConfigSource -{ -public: - GeneralConfigSource() = default; - - std::string get(const std::string &key) const override; - void set(const std::string &key, const std::string &val); - -private: - std::unordered_map<std::string, std::string> _map; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h deleted file mode 100644 index 07b09848a..000000000 --- a/runtime/onert/core/include/util/IConfigSource.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_I_CONFIG_SOURCE_H__ - -#include <string> - -namespace onert -{ -namespace util -{ - -struct IConfigSource -{ - /** - * @brief Destroy the IConfigSource object - */ - virtual ~IConfigSource() = default; - - /** - * @brief get the value for the matching key - * - * @param key string key to search - * @return string value associated with the key - */ - virtual std::string get(const std::string &key) const = 0; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h index a493789fa..36b6c85c8 100644 --- a/runtime/onert/core/include/util/ObjectManager.h +++ b/runtime/onert/core/include/util/ObjectManager.h @@ -17,14 +17,13 @@ #ifndef __ONERT_UTIL_OBJECT_MANAGER_H__ #define __ONERT_UTIL_OBJECT_MANAGER_H__ -#include <unordered_map> -#include <memory> -#include <list> -#include <functional> +#include "util/logging.h" +#include <cassert> +#include <functional> +#include <list> #include <memory> - -#include "util/logging.h" +#include <unordered_map> namespace onert { @@ -208,7 +207,7 @@ public: l.push_back(e.first); } - for (auto index : l) + for (auto &index : l) { fn(index, *_objects[index]); } diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h index 334257d87..da284d2fb 100644 --- a/runtime/onert/core/include/util/TracingCtx.h +++ b/runtime/onert/core/include/util/TracingCtx.h @@ -19,7 +19,7 @@ #include "ir/Graph.h" #include "ir/Index.h" -#include "ir/Subgraphs.h" +#include "ir/Model.h" #include <unordered_map> #include <mutex> @@ -37,29 +37,9 @@ class TracingCtx public: /** * @brief Create and store unique session id managed by this class - * Note that this constructor can be called by multiple sessions running in parallely. - * Use this constructor only when there is only one subgraph in a model. + * @note This constructor can be called by multiple session running in parallely. */ - TracingCtx(const ir::Graph *primary_subgraph) - { - decideSessionID(); - _subgraph_indices.emplace(primary_subgraph, 0); - } - - /** - * @brief Create and store unique session id managed by this class - * Note that this constructor can be called by multiple sessions running in parallely. - */ - TracingCtx(const onert::ir::Subgraphs *subgraphs) - { - assert(subgraphs); - - decideSessionID(); - - auto count = subgraphs->count(); - for (size_t i = 0; i < count; i++) - _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i); - } + TracingCtx(void) { decideSessionID(); } uint32_t getSessionId() const { return _session_id; } diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h index e67be988d..390dbb579 100644 --- a/runtime/onert/core/src/backend/builtin/ExternalContext.h +++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h @@ -24,6 +24,8 @@ #include <ruy/ctx.h> #include <ruy/tune.h> +#include <memory> + namespace onert { namespace backend diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc index 3d6358d9d..fa2fc0b94 100644 --- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc @@ -16,12 +16,10 @@ #include "KernelGenerator.h" -#include <backend/BackendContext.h> -#include <util/Utils.h> #include "kernel/IfLayer.h" -#include "kernel/WhileLayer.h" #include "kernel/PermuteLayer.h" -#include "exec/ExecutorBase.h" +#include "kernel/WhileLayer.h" + #include "exec/FunctionSequence.h" namespace onert @@ -35,12 +33,12 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d const std::shared_ptr<TensorRegistry> &tensor_reg, const std::shared_ptr<ExternalContext> &external_context) : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager}, - _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{ - external_context} + _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{ + external_context} { UNUSED_RELEASE(_graph); UNUSED_RELEASE(_tensor_registries); - UNUSED_RELEASE(_executor_map); + UNUSED_RELEASE(_executors); } std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind) @@ -48,20 +46,16 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI assert(_dyn_tensor_manager); assert(_tensor_reg); - auto dyn_shape_inferer = - std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg); - auto ret = std::make_unique<exec::FunctionSequence>(); // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_graph.operations(); - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_graph.operations().at(ind); + dyn_ctx->dynamic_shape_inferer = + std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); @@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::If &node) output_tensors.emplace_back(output_tensor); } - // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of + // IfLayer just set Executors instead of then and else executor to avoid complexity of // creating executor recusively const auto cond_tensor = input_tensors.front(); input_tensors.erase(input_tensors.begin()); auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>( - cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map, + cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors, _external_context); _return_fn = std::move(fn); @@ -136,10 +130,10 @@ void KernelGenerator::visit(const ir::operation::While &node) output_tensors.emplace_back(output_tensor); } - // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of + // WhileLayer just set Executors instead of cond and body executor to avoid complexity of // creating executor recusively auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>( - input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map, + input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context); _return_fn = std::move(fn); diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h index 00ad962b9..d5931ca26 100644 --- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h @@ -17,13 +17,14 @@ #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ -#include "exec/IExecutor.h" +#include "DynamicTensorManager.h" #include "ExternalContext.h" -#include "ir/Graph.h" -#include "TensorBuilder.h" -#include "compiler/TensorRegistries.h" -#include "backend/basic/KernelGeneratorBase.h" #include "TensorRegistry.h" +#include "../../compiler/TensorRegistries.h" + +#include "backend/basic/KernelGeneratorBase.h" +#include "exec/Executors.h" +#include "ir/Graph.h" namespace onert { @@ -43,10 +44,10 @@ public: { _tensor_registries = tensor_registries; } - void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map) + void setExecutors(const std::shared_ptr<exec::Executors> &executors) { // FIXME Using shared_ptr's raw pointer! - _executor_map = executor_map.get(); + _executors = executors.get(); } std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override; @@ -64,7 +65,7 @@ private: DynamicTensorManager *_dyn_tensor_manager; std::shared_ptr<TensorRegistry> _tensor_reg; compiler::TensorRegistries _tensor_registries; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc index fdd9d9d14..cdb41960a 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc @@ -16,10 +16,6 @@ #include "IfLayer.h" -#include <backend/ITensor.h> -#include "exec/ExecutorBase.h" -#include "PermuteLayer.h" - namespace onert { namespace backend @@ -33,13 +29,13 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor, const std::vector<backend::IPortableTensor *> input_tensors, const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map, + exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context) : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, - _executor_map{executor_map}, _external_context{external_context} + _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors}, + _external_context{external_context} { - // At this point, executor_map may not have executors of then subg and else subg + // At this point, executors may not have executors of then subg and else subg } void IfLayer::run() @@ -65,12 +61,12 @@ void IfLayer::run() if (cond_result) { VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl; - subg_exec = _executor_map->at(_then_subg_index).get(); + subg_exec = _executors->at(_then_subg_index).get(); } else { VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl; - subg_exec = _executor_map->at(_else_subg_index).get(); + subg_exec = _executors->at(_else_subg_index).get(); } subg_exec->execute(_input_tensors, _output_tensors); diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h index f12ef3605..fa5537a67 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h @@ -18,7 +18,7 @@ #define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__ #include <backend/IPortableTensor.h> -#include <exec/IExecutor.h> +#include <exec/Executors.h> #include "../ExternalContext.h" namespace onert @@ -37,8 +37,7 @@ public: const std::vector<backend::IPortableTensor *> input_tensors, const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map, - const std::shared_ptr<ExternalContext> &external_context); + exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context); public: void run() override; @@ -49,7 +48,7 @@ private: const std::vector<backend::IPortableTensor *> _output_tensors; const ir::SubgraphIndex _then_subg_index; const ir::SubgraphIndex _else_subg_index; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc index 20cd87ad1..ddaecdf57 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc @@ -16,9 +16,9 @@ #include "PermuteLayer.h" -#include "exec/ShapeConverter.h" +#include "../../../exec/ShapeConverter.h" -#include "ruy/context.h" // from @ruy +#include <ruy/context.h> // from @ruy namespace onert { diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h index ac5470e85..227e32434 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h @@ -17,10 +17,10 @@ #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ #define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ -#include "exec/IPermuteFunction.h" -#include "exec/IExecutor.h" #include "../ExternalContext.h" -#include "ruy/thread_pool.h" // from @ruy +#include "../../../exec/IPermuteFunction.h" + +#include <ruy/thread_pool.h> // from @ruy namespace onert { diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc index 81b4a6378..8e006c5ea 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc @@ -16,11 +16,12 @@ #include "WhileLayer.h" -#include <algorithm> -#include <backend/ITensor.h> -#include "exec/ExecutorBase.h" -#include <misc/polymorphic_downcast.h> #include "PermuteLayer.h" +#include "../../../exec/ExecutorBase.h" + +#include <misc/polymorphic_downcast.h> + +#include <algorithm> namespace onert { @@ -34,14 +35,14 @@ namespace kernel WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &cond_subg_index, - const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map, + const ir::SubgraphIndex &body_subg_index, exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr<ExternalContext> &external_context) : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map}, + _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors}, _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context} { - // At this point, executor_map may not have executors of cond subg and body subg + // At this point, executors may not have executors of cond subg and body subg } void WhileLayer::run() @@ -56,8 +57,8 @@ void WhileLayer::run() // // Run cond subg // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> // "_dst_tensors" - auto cond_exec = _executor_map->at(_cond_subg_index).get(); - auto body_exec = _executor_map->at(_body_subg_index).get(); + auto cond_exec = _executors->at(_cond_subg_index).get(); + auto body_exec = _executors->at(_body_subg_index).get(); // Need a temp tensor to hold the cond subgraph output assert(cond_exec->getOutputTensors().size() == 1); diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h index 912102781..8551b3d09 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h @@ -18,7 +18,7 @@ #define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__ #include <backend/IPortableTensor.h> -#include <exec/IExecutor.h> +#include <exec/Executors.h> #include <exec/IFunction.h> #include <ir/OperandIndexSequence.h> #include <ir/Graph.h> @@ -41,7 +41,7 @@ public: WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, - exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager, + exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr<ExternalContext> &external_context); public: @@ -52,7 +52,7 @@ private: const ir::SubgraphIndex _body_subg_index; const std::vector<backend::IPortableTensor *> _input_tensors; const std::vector<backend::IPortableTensor *> _output_tensors; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors const std::shared_ptr<ExternalContext> _external_context; }; diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc index 0d6051b21..44442c065 100644 --- a/runtime/onert/core/src/compiler/BackendManager.cc +++ b/runtime/onert/core/src/compiler/BackendManager.cc @@ -16,16 +16,11 @@ #include "compiler/BackendManager.h" -#include <memory> -#include <dlfcn.h> +#include "../backend/builtin/Backend.h" +#include "../backend/builtin/Config.h" -#include "backend/Backend.h" -#include "backend/builtin/Backend.h" -#include "backend/builtin/Config.h" -#include "backend/IConfig.h" -#include "util/logging.h" -#include "util/ConfigSource.h" -#include "misc/string_helpers.h" +#include <dlfcn.h> +#include <memory> static const char *SHARED_LIB_EXT = #if defined(__APPLE__) && defined(__MACH__) @@ -152,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const return nullptr; } -const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; } +const backend::Backend *BackendManager::getBuiltin() const { return _builtin; } } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 6a1d8fcec..7be9c1e3b 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -18,29 +18,27 @@ #include "ExecutorFactory.h" #include "ShapeValidator.h" +#include "pass/ConstantOutputPass.h" +#include "pass/OddOutputPass.h" +#include "pass/PassRunner.h" +#include "pass/UnusedOperandEliminationPass.h" +#include "../backend/builtin/Config.h" +#include "../dumper/dot/DotDumper.h" +#include "../interp/InterpExecutor.h" +#include "../ir/OperationCloner.h" +#include "../ir/OperationDumper.h" +#include "../ir/verifier/Verifier.h" -#include <backend/builtin/Config.h> -#include "compiler/BackendManager.h" -#include "compiler/IScheduler.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" #include "compiler/StaticShapeInferer.h" -#include "compiler/OperationLowerInfo.h" -#include "compiler/pass/ConstantOutputPass.h" -#include "compiler/pass/OddOutputPass.h" -#include "compiler/pass/PassRunner.h" -#include "compiler/pass/UnusedOperandEliminationPass.h" -#include "exec/ExecTime.h" -#include "ir/verifier/Verifier.h" -#include "dumper/dot/DotDumper.h" -#include "compiler/Linear.h" -#include "interp/InterpExecutor.h" #include "util/ConfigSource.h" #include "util/logging.h" -#include "ir/OperationDumper.h" -#include "ir/OperationCloner.h" -#include "misc/string_helpers.h" -#include "json/json.h" + +#include <misc/polymorphic_downcast.h> +#include <misc/string_helpers.h> +#include <json/json.h> + +// TODO Remove using fstream header +#include <fstream> namespace { @@ -86,8 +84,104 @@ void verboseOptions(compiler::CompilerOptions &options) << std::noboolalpha; } -void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs, - const std::string &str) +std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> +createStaticShapeInferers( + const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> + &lowered_subgs) +{ + // Allocate StaticShapeInferer per each subgraph + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers; + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get()); + } + + // Append observers in all StaticShapeInferers + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + + // TODO: Change this iteration for all to controlflow iteration + lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &, + const ir::Operation &op) { + // A Function to append child inferers. These make it possible for a StaticShapeInferer to + // call StaticShapeInferes of child subgraphs recursively + auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) { + auto *child_inferer = inferers.at(child_subg_idx).get(); + inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer); + }; + + // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer + // to update inputs of child subgraphs + auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector<ir::Operand *> child_subg_inputs; + auto &child_subg = lowered_subgs.at(child_subg_idx)->graph(); + for (const auto &input_idx : child_subg.getInputs()) + { + auto operand_ptr = child_subg.operands().getRawPtr(input_idx); + child_subg_inputs.emplace_back(operand_ptr); + } + inferers.at(subg_index) + ->appendSubgInputObserver(child_subg_idx, + std::make_unique<compiler::OperandObserver>(child_subg_inputs)); + }; + + // A Function to set controlflow output observers. This makes it possible for a + // StaticShapeInferer to update outputs of parent controlflow opeerations + auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector<ir::Operand *> cf_outputs; + auto &subg = lowered_subg->graph(); + for (const auto &output_idx : op.getOutputs()) + { + auto operand_ptr = subg.operands().getRawPtr(output_idx); + cf_outputs.emplace_back(operand_ptr); + } + inferers.at(child_subg_idx) + ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs)); + }; + + // Append Observers in a StaticShapeInferer + if (op.opcode() == ir::OpCode::If) + { + const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op); + + appendChildInferer(if_op.param().then_subg_index); + appendChildInferer(if_op.param().else_subg_index); + + appendSubgraphInputObserver(if_op.param().then_subg_index); + appendSubgraphInputObserver(if_op.param().else_subg_index); + + setControlFlowOutputObserver(if_op.param().then_subg_index); + } + else if (op.opcode() == ir::OpCode::While) + { + const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op); + + appendChildInferer(while_op.param().cond_subg_index); + appendChildInferer(while_op.param().body_subg_index); + + appendSubgraphInputObserver(while_op.param().cond_subg_index); + appendSubgraphInputObserver(while_op.param().body_subg_index); + + setControlFlowOutputObserver(while_op.param().body_subg_index); + } + }); + } + + return inferers; +} + +} // namespace + +namespace onert +{ + +namespace compiler +{ +void ManualSchedulerOptions::setBackendMap(const std::string &str) { // TODO Support multiple subgraphs for manual scheduling auto key_val_list = nnfw::misc::split(str, ';'); @@ -102,37 +196,24 @@ void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgr const auto &key_str = key_val.at(0); const auto &val = key_val.at(1); auto key = static_cast<uint32_t>(std::stoi(key_str)); - - subgs.at(ir::SubgraphIndex{0}) - ->operations() - .at(ir::OperationIndex{key}); // Check if exist, or this wil throw - ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); + this->index_to_backend.emplace(ir::OperationIndex{key}, val); } } -} // namespace - -namespace onert -{ - -namespace compiler +std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig() { - -CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) -{ - CompilerOptions options; - options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); - options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); - options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); - options.executor = util::getConfigString(util::config::EXECUTOR); - options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); - options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); - options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); - options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); - + auto o = std::make_unique<CompilerOptions>(); + o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); + o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); + o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); + o->executor = util::getConfigString(util::config::EXECUTOR); + o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); + o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); + o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); + o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); { // Backend for all - auto &ms_options = options.manual_scheduler_options; + auto &ms_options = o->manual_scheduler_options; // Default value for op_backend_all is first element in the backend list ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS); @@ -151,54 +232,67 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) // Index to Backend auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); - setBackendMap(ms_options, subgs, map_str); + ms_options.setBackendMap(map_str); } - return options; + return o; } -Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx) - : _subgraphs{subgs}, _state{State::CREATED} +Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt) + : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt} { - // Set default values for CompilerOptions - // All these default values should not be fetched from Env, when we stop supporting Android NN - // API. - _options = fetchCompilerOptionsFromGlobalConfig(*subgs); - - _options.tracing_ctx = tracing_ctx; + // DO NOTHING } -void Compiler::enableToFp16() { _options.fp16_enable = true; } +Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts) + : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{} +{ + for (uint32_t i = 0; i < copts.size(); i++) + { + _voptions.push_back(copts[i].get()); + } +} -void Compiler::set_backend_from_str(const char *backend_settings) +void Compiler::enableToFp16() { - assert(_subgraphs != nullptr); - // Backend for all - auto &ms_options = _options.manual_scheduler_options; - setBackendMap(ms_options, *_subgraphs, std::string{backend_settings}); + for (auto options : _voptions) + options->fp16_enable = true; } void Compiler::checkProfilerConditions() { - if (!_options.he_scheduler) + if (_nnpkg->model_count() != 1) + throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet"); + + auto &options = *_voptions[0]; + + if (options.he_scheduler) throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); - if (_options.executor != "Dataflow") + if (options.executor != "Dataflow") throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); } bool Compiler::buildPartialGraph(uint32_t num_graphs) { - if (_subgraphs->count() > 1) + // Use 1st model and options only on partial graph (pipeline) compile + assert(_nnpkg->model_count() == 1); + assert(_voptions.size() == 1); + + auto model = _nnpkg->primary_model(); + auto &options = *_voptions[0]; + + if (model->subgraphs_count() > 1) return false; - auto partialgraphs = std::make_shared<ir::Subgraphs>(); + auto partialgraphs = std::make_shared<ir::Model>(); for (uint32_t idx = 0; idx < num_graphs; idx++) { auto partialgraph = std::make_unique<ir::Graph>(); partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph)); } - _subgraphs->primary()->setPartialgraphs(partialgraphs); + model->primary_subgraph()->setPartialModel(partialgraphs); auto partial_graph = primary_subgraph()->partialgraphs(); @@ -208,8 +302,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) for (auto use_operation : use_operations) { - auto graph_index = _options.partial_graph_options.index_to_graph.find(use_operation); - if (graph_index == _options.partial_graph_options.index_to_graph.end()) + auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation); + if (graph_index == options.partial_graph_options.index_to_graph.end()) { throw std::runtime_error("Invalid Partition Map"); } @@ -230,8 +324,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) primary_subgraph()->operations().iterate( [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) { - auto graph_index = _options.partial_graph_options.index_to_graph.find(operation_index); - if (graph_index == _options.partial_graph_options.index_to_graph.end()) + auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index); + if (graph_index == options.partial_graph_options.index_to_graph.end()) { throw std::runtime_error("Invalid Partition Map"); } @@ -259,7 +353,7 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) assert(new_operation_index == operation_index); }); - for (uint32_t idx = 0; idx < partial_graph->count(); idx++) + for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++) { auto partition = partial_graph->at(ir::SubgraphIndex{idx}); @@ -282,10 +376,10 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) auto use_operations = primary_subgraph()->operands().at(operand_index).getUses(); auto iter = use_operations.begin(); ir::SubgraphIndex graph_index = - _options.partial_graph_options.index_to_graph.find(*iter++)->second; + options.partial_graph_options.index_to_graph.find(*iter++)->second; while (iter != use_operations.end()) { - if (graph_index != _options.partial_graph_options.index_to_graph.find(*iter)->second && + if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second && !partition->getOutputs().contains(operand_index)) { partition->addOutput(operand_index, @@ -344,96 +438,157 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) return true; } -std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) +std::shared_ptr<CompilerArtifact> Compiler::compile(void) { - // Set control flow backend for control flow operators + for (auto options : _voptions) { + // Set control flow backend for control flow operators auto &builtin_id = backend::builtin::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; - } + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; - // FIXME This is a workaround for bcq operations, should remove it - { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + // FIXME This is a workaround for bcq operations, should remove it + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + + // FIXME This is a workaround for bulk operations, should remove it + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix"; + + verboseOptions(*options); } - verboseOptions(_options); + // NYI: allow one model compilation + auto const model_count = _nnpkg->model_count(); + if (model_count != _voptions.size()) + throw std::runtime_error{"Model count and option vector size mismatch"}; - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { - // Mandatory passes - pass::PassRunner{} - .append(std::make_unique<pass::ConstantOutputPass>(subg)) - .append(std::make_unique<pass::OddOutputPass>(subg)) - .run(); + for (uint32_t i = 0; i < model_count; i++) + { + _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + // Mandatory passes + pass::PassRunner{} + .append(std::make_unique<pass::ConstantOutputPass>(subg)) + .append(std::make_unique<pass::OddOutputPass>(subg)) + .run(); - // Optimizations - pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run(); - }); + // Optimizations + pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run(); + }); + } /*************************************************** * Prepare compilation phase ***************************************************/ - auto executors = std::make_shared<exec::ExecutorMap>(); - // Compilable check // TODO: Support hybrid execution - // execution between interpreter and compiled executor (including control flow) - if (_options.disable_compile) + if (_voptions[0]->disable_compile) { - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + if (model_count > 1) + throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"}; + + auto executors = std::make_shared<exec::Executors>(); + + _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg)); }); _state = State::COMPILED; - return executors; + return std::make_shared<CompilerArtifact>(executors, nullptr); } // Mode check - if (_options.he_profiling_mode) + // TODO handle option for each model + if (_voptions[0]->he_profiling_mode) checkProfilerConditions(); /*************************************************** * Backend independent analysis & optimization phase ***************************************************/ - auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level); + // TODO Handle dump level for each model + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + auto tracing_ctx = std::make_unique<util::TracingCtx>(); + + // Model edge context + std::unique_ptr<ir::ModelEdges> model_edges = nullptr; // Lower: Assign backend std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs; - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); - dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); - // Lower: Assign backend - lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options); + if (model_count == 1) + { + _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value())); + // Lower: Assign backend + lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]); + // Set tracing_ctx for copied graph + tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value()); + }); + } + else + { + // TODO Support tracing_ctx for multiple model + tracing_ctx = nullptr; + + // Copy model edge context + model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges()); - subg.setSubgraphs(nullptr); - }); + for (uint32_t i = 0; i < model_count; i++) + { + auto model = _nnpkg->model(ir::ModelIndex{i}); + if (model->subgraphs_count() != 1) + throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"}; + auto subg = model->primary_subgraph(); + dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i)); + + // For multimodel, model index is used for lowered graph index in lowered graph map + // and index type is SubgraphIndex + // TODO Find better way to represent lowered graph index for multimodel's subgraph + lowered_subgs[ir::SubgraphIndex{i}] = + std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]); + } + } - _subgraphs.reset(); + _nnpkg.reset(); for (auto &pair : lowered_subgs) { const auto &subg_index = pair.first; auto &lowered_subg = pair.second; - onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); - dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); + dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value())); } // Shape inference. { - const auto primary_subg_idx = ir::SubgraphIndex{0}; - StaticShapeInferer inferer(primary_subg_idx, lowered_subgs); - auto &lowered_subg = lowered_subgs.at(primary_subg_idx); - auto ordered_ops = lowered_subg->graph().topolSortOperations(); - for (auto op_ind : ordered_ops) + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(lowered_subgs); + + if (model_count == 1) { - const auto &op = lowered_subg->graph().operations().at(op_ind); - bool has_dynamic_tensor = inferer.infer(op); - lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor); + const auto primary_subg_idx = ir::SubgraphIndex{0}; + inferers.at(primary_subg_idx)->infer(); + + for (const auto &pair : inferers) + { + const auto inferer = pair.second.get(); + inferer->dump(); + } + } + else + { + // Assume multi model has only one subgraph on each model + for (const auto &pair : inferers) + { + const auto inferer = pair.second.get(); + inferer->infer(); + inferer->dump(); + } } - inferer.dump(); } // Shape validation @@ -452,8 +607,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) /************************************************************* * Backend independent analysis & optimization phase finished *************************************************************/ - - executors = std::make_shared<exec::ExecutorMap>(); + auto executors = std::make_shared<exec::Executors>(std::move(model_edges)); for (auto &pair : lowered_subgs) { const auto &subg_index = pair.first; @@ -464,24 +618,31 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); - auto executor = std::unique_ptr<exec::IExecutor>{ - ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)}; + + auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0]; + auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create( + std::move(lowered_subg), tracing_ctx.get(), options, executors)}; executor->setIndexedRanks(indexed_ranks); - executors->insert(std::make_pair(subg_index, std::move(executor))); + executors->emplace(subg_index, std::move(executor)); } /******************************** * Code generation phase finished ********************************/ _state = State::COMPILED; - return executors; + return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx)); } -std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *package_file_path, - const char *map_file_path) +std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path, + const char *map_file_path) { - std::vector<std::shared_ptr<exec::ExecutorMap>> executors; - auto executor_map = std::make_shared<exec::ExecutorMap>(); + // Allow one model compilation for pipeline + if (_nnpkg->model_count() != 1) + throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."}; + assert(_voptions.size() == 1); + + auto model = _nnpkg->primary_model(); + auto &options = *_voptions[0]; std::string package_path(package_file_path); std::string partition_map_file; @@ -508,7 +669,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa num_graphs = np.asUInt(); for (uint32_t i = 0; i < (uint32_t)map.size(); ++i) { - _options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] = + options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] = ir::SubgraphIndex{map[i].asUInt()}; } } @@ -525,25 +686,25 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa // Set control flow backend for control flow operators { auto &builtin_id = backend::builtin::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; } // FIXME This is a workaround for bcq operations, should remove it { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; } - // It doesn't support tracing in case of partial graph + // FIXME This is a workaround for bulk operations, should remove it { - _options.tracing_ctx = nullptr; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix"; } - verboseOptions(_options); + verboseOptions(options); - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { // Mandatory passes auto part = subg.partialgraphs(); part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) { @@ -566,38 +727,41 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa // Compilable check // TODO: Support hybrid execution - // execution between interpreter and compiled executor (including control flow) - if (_options.disable_compile) + if (options.disable_compile) { - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - executor_map->emplace(index, std::make_unique<interp::InterpExecutor>(subg)); - executors.push_back(executor_map); + std::vector<std::shared_ptr<CompilerArtifact>> results; + auto executors = std::make_shared<exec::Executors>(); + + model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg)); }); + results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr)); _state = State::COMPILED; - return executors; + return results; } // Mode check - if (_options.he_profiling_mode) + if (options.he_profiling_mode) checkProfilerConditions(); /*************************************************** * Backend independent analysis & optimization phase ***************************************************/ - auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level); + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper_part(dump_level); // Lower: Assign backend std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_partialgraphs; - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { auto part = subg.partialgraphs(); part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) { - onert::dumper::dot::DotDumper dot_dumper_part(partialgraph, dump_level); - dot_dumper_part.dump(nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value())); + dot_dumper_part.dump(partialgraph, + nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value())); // // Lower: Assign backend lowered_partialgraphs[pindex] = - std::make_unique<compiler::LoweredGraph>(subg, partialgraph, _options); - partialgraph.setSubgraphs(nullptr); + std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options); }); }); @@ -606,25 +770,20 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa const auto &partialgraph_index = pair.first; auto &lowered_partialgraph = pair.second; - onert::dumper::dot::DotDumper dot_dumper_lowered_part(lowered_partialgraph.get(), dump_level); - dot_dumper_lowered_part.dump("after_lower_subg_partialgraph-" + - std::to_string(partialgraph_index.value())); + dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" + + std::to_string(partialgraph_index.value())); } // Partial Graph shape inference + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(lowered_partialgraphs); + // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times for (auto &pair : lowered_partialgraphs) { const auto &partialgraph_index = pair.first; - auto &lowered_partialgraph = pair.second; - StaticShapeInferer partial_inferer(partialgraph_index, lowered_partialgraphs); - auto ordered_ops = lowered_partialgraph->graph().topolSortOperations(); - for (auto op_ind : ordered_ops) - { - const auto &op = lowered_partialgraph->graph().operations().at(op_ind); - bool has_dynamic_tensor = partial_inferer.infer(op); - lowered_partialgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor); - } - partial_inferer.dump(); + const auto partial_inferer = inferers.at(partialgraph_index).get(); + partial_inferer->infer(); + partial_inferer->dump(); } // Shape validation @@ -652,9 +811,11 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph))); } + std::vector<std::shared_ptr<CompilerArtifact>> results; for (auto &pair : ordered) { - executor_map = std::make_shared<exec::ExecutorMap>(); + auto executors = std::make_shared<exec::Executors>(); + const auto &partialgraph_index = ir::SubgraphIndex(pair.first); auto &lowered_partialgraph = pair.second; auto indexed_ranks = lowered_partialgraph->indexed_ranks(); @@ -663,19 +824,21 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa lowered_partialgraph->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); auto executor = std::unique_ptr<exec::IExecutor>{ - ExecutorFactory::get().create(std::move(lowered_partialgraph), _options, executor_map)}; + ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)}; executor->setIndexedRanks(indexed_ranks); - executor_map->insert(std::make_pair(ir::SubgraphIndex{0}, std::move(executor))); - executors.push_back(executor_map); + executors->emplace(ir::SubgraphIndex{0}, std::move(executor)); + + // It doesn't support tracing in case of partial graph + results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr)); } - _subgraphs.reset(); + _nnpkg.reset(); /******************************** * Code generation phase finished ********************************/ _state = State::COMPILED; - return executors; + return results; } } // namespace compiler diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index f9db1ca89..024556e7e 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,23 +16,22 @@ #include "ExecutorFactory.h" -#include "backend/builtin/Config.h" -#include "backend/builtin/KernelGenerator.h" -#include "backend/builtin/TensorBuilder.h" -#include "backend/builtin/UserTensor.h" -#include "backend/IPortableTensor.h" -#include "compiler/BackendManager.h" -#include "compiler/BackendManager.h" -#include "compiler/ExecutionBuilder.h" -#include "compiler/Linear.h" -#include "dumper/text/GraphDumper.h" -#include "exec/DataflowExecutor.h" -#include "exec/ExecTime.h" -#include "exec/ExecutionObservers.h" -#include "exec/LinearExecutor.h" -#include "exec/ParallelExecutor.h" -#include "ir/OperationCloner.h" -#include "util/TracingCtx.h" +#include "Linear.h" +#include "../backend/builtin/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/UserTensor.h" +#include "../dumper/text/GraphDumper.h" +#include "../exec/DataflowExecutor.h" +#include "../exec/ExecTime.h" +#include "../exec/ExecutionObservers.h" +#include "../exec/LinearExecutor.h" +#include "../exec/ParallelExecutor.h" +#include "../ir/OperationCloner.h" + +#include <backend/IPortableTensor.h> +#include <compiler/BackendManager.h> +#include <compiler/ExecutionBuilder.h> +#include <util/TracingCtx.h> #include <functional> #include <memory> @@ -242,16 +241,17 @@ ExecutorFactory::ExecutorFactory() { _map["Linear"] = createLinearExecutor; _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, false); + std::placeholders::_3, std::placeholders::_4, false); _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, true); + std::placeholders::_3, std::placeholders::_4, true); } exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map) + const std::shared_ptr<exec::Executors> &executors) { - return _map.at(options.executor)(std::move(lowered_graph), options, executor_map); + return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors); } void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, @@ -282,7 +282,7 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap } void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, - const std::shared_ptr<exec::ExecutorMap> &executor_map, + const std::shared_ptr<exec::Executors> &executors, const backend::BackendContexts &backend_contexts) { for (auto &pair : backend_contexts) @@ -292,7 +292,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, { auto builtin_kernel_gen = builtin_context->kernel_gen; builtin_kernel_gen->setTensorRegistries(tensor_regs); - builtin_kernel_gen->setExecutorMap(executor_map); + builtin_kernel_gen->setExecutors(executors); } } } @@ -317,12 +317,11 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con return ordered_contexts; } -exec::IExecutor * -ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map) +exec::IExecutor *ExecutorFactory::createLinearExecutor( + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors) { - auto graph = lowered_graph->graph(); + auto &graph = lowered_graph->graph(); backend::BackendContexts backend_contexts = createBackendContexts(*lowered_graph, options.executor == "Linear"); @@ -346,7 +345,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts); ExecutionBuilder builder; @@ -426,14 +425,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo auto code_map = builder.releaseCodeMap(); - auto exec = new exec::LinearExecutor{ - std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order, - options.tracing_ctx}; + auto exec = new exec::LinearExecutor{std::move(lowered_graph), + std::move(backend_contexts), + tensor_regs, + std::move(code_map), + order, + tracing_ctx}; if (!options.trace_filepath.empty()) { - std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>( - options.trace_filepath, exec->graph(), options.tracing_ctx); + std::unique_ptr<exec::IExecutionObserver> ctp = + std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } @@ -441,8 +443,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo } exec::IExecutor *ExecutorFactory::createDataflowExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel) + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors, + bool parallel) { backend::BackendContexts backend_contexts = createBackendContexts(*lowered_graph, options.executor == "Linear"); @@ -462,7 +465,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts); ExecutionBuilder builder; @@ -491,13 +494,13 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (parallel) { exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts), - tensor_regs, std::move(code_map), options.tracing_ctx}; + tensor_regs, std::move(code_map), tracing_ctx}; } else { auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, - std::move(code_map), options.tracing_ctx}; + std::move(code_map), tracing_ctx}; if (options.he_profiling_mode) { std::vector<const backend::Backend *> backends; @@ -515,8 +518,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (!options.trace_filepath.empty()) { - std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>( - options.trace_filepath, exec->graph(), options.tracing_ctx); + std::unique_ptr<exec::IExecutionObserver> ctp = + std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index 2ee05fae3..70c089f8c 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -21,7 +21,7 @@ #include "backend/ITensor.h" #include "compiler/LoweredGraph.h" -#include "exec/IExecutor.h" +#include "exec/Executors.h" #include <deque> #include <unordered_map> @@ -38,8 +38,9 @@ public: public: exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map); + const std::shared_ptr<exec::Executors> &executors); private: ExecutorFactory(); @@ -48,25 +49,26 @@ private: static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts); static void prepareBuiltinBackend(const TensorRegistries &tensor_regs, - const std::shared_ptr<exec::ExecutorMap> &executor_map, + const std::shared_ptr<exec::Executors> &executors, const backend::BackendContexts &backend_contexts); static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> orderBackendContext(const backend::BackendContexts &backend_contexts); - static exec::IExecutor * - createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map); + static exec::IExecutor *createLinearExecutor( + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors); static exec::IExecutor * createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel); + const std::shared_ptr<exec::Executors> &executors, bool parallel); private: - std::unordered_map<std::string, std::function<exec::IExecutor *( - std::unique_ptr<compiler::LoweredGraph>, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map)>> + std::unordered_map< + std::string, + std::function<exec::IExecutor *( + std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>> _map; }; diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc index 5c1cef1ab..98dc906e4 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc @@ -180,7 +180,7 @@ void Fp32ToFp16Converter::appendOpSequences() { _lowered_graph.op_seqs().iterate( [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); // For now, the only acl_cl supports fully fp16 type @@ -375,7 +375,7 @@ void Fp32ToFp16Converter::convertOperands() { _lowered_graph.op_seqs().iterate( [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); // For now, the only acl_cl supports fully fp16 if (lower_info->backend()->config()->id() != kAclClBackendConfigId) @@ -515,7 +515,7 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OperandIndex &new_op_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>(); auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout()); @@ -527,7 +527,7 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OpSequenceIndex &new_op_seq_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto new_lower_info = @@ -635,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex const ir::OperationIndex &node_index) { auto &node = _lowered_graph.graph().operations().at(node_index); - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto layout = lower_info->layout(); diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc index 2f996c8e8..c4bfddb8f 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.cc @@ -14,17 +14,14 @@ * limitations under the License. */ -#include "ir/Operand.h" -#include "compiler/HEScheduler.h" -#include "ir/Graph.h" -#include "util/ConfigSource.h" +#include "HEScheduler.h" + #include "compiler/BackendResolver.h" +#include "ir/Graph.h" #include "util/logging.h" -#include "util/Utils.h" -#include "exec/FunctionSequence.h" + #include <cassert> #include <cmath> -#include <chrono> namespace { diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h index 1a95b9881..18ea388fd 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.h +++ b/runtime/onert/core/src/compiler/HEScheduler.h @@ -23,14 +23,16 @@ #ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_ #define __ONERT_COMPILER_H_E_SCHEDULER_H_ -#include "compiler/IScheduler.h" -#include "compiler/BackendManager.h" -#include "compiler/Compiler.h" -#include "ir/Graph.h" -#include "exec/ExecTime.h" -#include "backend/Backend.h" -#include <memory> -#include "ir/OperationIndexMap.h" +#include "IScheduler.h" +#include "../backend/builtin/Config.h" +#include "../exec/ExecTime.h" + +#include <backend/Backend.h> +#include <compiler/BackendManager.h> +#include <compiler/Compiler.h> +#include <ir/Graph.h> +#include <ir/OperationIndexMap.h> + #include <map> #include <memory> diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc index 514c01485..c4a2df025 100644 --- a/runtime/onert/test/core/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc @@ -14,14 +14,13 @@ * limitations under the License. */ -#include <compiler/HEScheduler.h> -#include <exec/ExecTime.h> +#include "HEScheduler.h" +#include "../exec/ExecTime.h" -#include <ir/Shape.h> +#include <ir/DataType.h> #include <ir/InternalType.h> +#include <ir/Shape.h> #include <ir/TypeInfo.h> -#include <ir/DataType.h> - #include <ir/operation/BinaryArithmetic.h> #include <ir/operation/FullyConnected.h> @@ -362,8 +361,8 @@ class HESchedulerTestWithExecutorParam : public HESchedulerTest, // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - // one time for each executor -INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, - testing::Values(LINEAR, DATAFLOW, PARALLEL)); +INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam, + testing::Values(LINEAR, DATAFLOW, PARALLEL)); // Test scheduler behavior for straight graph with known execution time of all nodes and permutes. TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) @@ -371,9 +370,9 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) setExecutor(GetParam()); // Prepare graph - ir::Subgraphs subgs; + ir::Model model; auto graph(createStraightGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); + model.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2); // Set default execution and transfer time @@ -392,8 +391,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) et.storeOperationsExecTime(); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu"); @@ -407,8 +406,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); @@ -423,9 +422,9 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) setExecutor(GetParam()); // Prepare graph - ir::Subgraphs subgs; + ir::Model model; auto graph(createBranchedGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); + model.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), sub_op_idx(5); @@ -449,8 +448,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) et.storeOperationsExecTime(); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); std::string branch1_expected_backend("npu"), branch2_expected_backend("npu"); @@ -483,8 +482,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) et.storeOperationsExecTime(); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); @@ -505,9 +504,9 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode) setExecutor(DATAFLOW); // Prepare graph - ir::Subgraphs subgs; + ir::Model model; auto graph(createBranchedGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); + model.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), sub_op_idx(5); @@ -528,8 +527,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode) et.storeOperationsExecTime(); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); @@ -550,8 +549,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode) et.storeOperationsExecTime(); // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); const auto br = scheduler.schedule(*graph); ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), br->getBackend(mul1_op_idx)->config()->id()); diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 73ba96238..f85b8d1bd 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -14,15 +14,13 @@ * limitations under the License. */ -#include <algorithm> -#include <sstream> - #include "Linear.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" +#include "../dumper/text/GraphDumper.h" + #include "util/logging.h" -#include "dumper/text/GraphDumper.h" + +#include <sstream> namespace onert { diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 999bffa7c..9e84753a7 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -16,24 +16,23 @@ #include "compiler/LoweredGraph.h" -#include <assert.h> -#include <algorithm> -#include <sstream> -#include "util/logging.h" -#include "compiler/pass/ConstantInsertionPass.h" -#include "compiler/pass/ConstantLoweringPass.h" -#include "compiler/pass/PassRunner.h" -#include "compiler/pass/PermutationOperationPass.h" -#include "compiler/pass/PermutationInsertionPass.h" -#include "compiler/pass/PermutationEliminationPass.h" -#include "dumper/text/GraphDumper.h" -#include "ir/verifier/Verifier.h" +#include "HEScheduler.h" +#include "ManualScheduler.h" +#include "pass/ConstantInsertionPass.h" +#include "pass/ConstantLoweringPass.h" +#include "pass/PassRunner.h" +#include "pass/PermutationEliminationPass.h" +#include "pass/PermutationInsertionPass.h" +#include "pass/PermutationOperationPass.h" +#include "../dumper/text/GraphDumper.h" +#include "../ir/verifier/Verifier.h" + #include "backend/Backend.h" -#include "backend/IConfig.h" #include "compiler/BackendResolver.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" -#include "util/TracingCtx.h" +#include "util/logging.h" + +#include <cassert> +#include <sstream> namespace onert { @@ -42,7 +41,7 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { - lowerGraph(graph, options); + lowerGraph(options); } // TODO Design better class and constructor to represent parent_graph @@ -50,18 +49,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph const CompilerOptions &options) : _graph{graph}, _parent_graph{parent_graph} { - lowerGraph(graph, options); + lowerGraph(options); } -void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options) +void LoweredGraph::lowerGraph(const CompilerOptions &options) { - // set tracing_ctx for copied graph - if (options.tracing_ctx) - { - auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph); - options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value()); - } - // Build backend contexts auto &backend_manager = BackendManager::get(); // Create contexts for other backends diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc index 1c7000986..8c6421744 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.cc +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -34,77 +34,72 @@ namespace onert namespace compiler { -ShapeValidator::ShapeValidator(const ir::Graph &graph) - : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN} -{ -} +ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {} void ShapeValidator::checkUnaryOp(const ir::Operation &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; // Check if I/O shapes match - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::operator()() { - // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when - // creating Compiler - assert(_graph.subgraphs() == nullptr); - - _current_layout = _graph.layout(); - _graph.operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); } void ShapeValidator::visit(const ir::operation::BatchMatMul &node) { + const auto &operands = _graph.operands(); const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS)); const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS)); const auto out_index{node.getOutputs().at(0)}; - if (_ctx.at(out_index).info().isDynamic()) + if (operands.at(out_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2); + OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2); + OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2); } void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); if (node.getInputs().size() != 2) { const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; - OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2)); - OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(crops_index).shape().rank() == 2); + OP_REQUIRES(operands.at(crops_index).shape().dim(0) == + (operands.at(ifm_index).shape().rank() - 2)); + OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2); } OP_REQUIRES(input_shape.C == output_shape.C); @@ -112,8 +107,9 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; @@ -125,16 +121,16 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1)); + OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1)); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2); // more shape validation will be done inside kernel. @@ -143,8 +139,9 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) void ShapeValidator::visit(const ir::operation::BCQGather &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; @@ -153,13 +150,14 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node) const auto input_clusters_index{ node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; - OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more - OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2); + OP_REQUIRES(operands.at(indices_index).shape().rank() <= + 2); // TODO : support rank up to 4 or more + OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2); // more shape validation will be done inside kernel. } @@ -171,62 +169,67 @@ void ShapeValidator::visit(const ir::operation::Comparison &) void ShapeValidator::visit(const ir::operation::Softmax &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::InstanceNorm &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape()); - OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape()); + OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1); + OP_REQUIRES(operands.at(beta_index).shape().rank() == 1); } void ShapeValidator::visit(const ir::operation::Pool2D &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); } void ShapeValidator::visit(const ir::operation::Permute &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Reduce &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto input_shape = _ctx.at(input_index).shape(); - const auto output_shape = _ctx.at(output_index).shape(); + const auto input_shape = operands.at(input_index).shape(); + const auto output_shape = operands.at(output_index).shape(); OP_REQUIRES(input_shape.rank() <= 4); OP_REQUIRES(output_shape.rank() <= input_shape.rank()); @@ -266,18 +269,20 @@ void ShapeValidator::visit(const ir::operation::Reduce &node) void ShapeValidator::visit(const ir::operation::Transpose &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; - const auto &output_shape = _ctx.at(output_index).shape(); - const auto &input_shape = _ctx.at(input_index).shape(); + const auto &output_shape = operands.at(output_index).shape(); + const auto &input_shape = operands.at(input_index).shape(); - OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 || - input_shape.rank() == static_cast<int>(_ctx.at(perm_index).shape().num_elements())); + OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 || + input_shape.rank() == + static_cast<int>(operands.at(perm_index).shape().num_elements())); OP_REQUIRES(input_shape.rank() == output_shape.rank()); } @@ -285,8 +290,9 @@ void ShapeValidator::visit(const ir::operation::RNN &node) { // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn // TODO Support dynamic rnn + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto hidden_state_out_index{ @@ -299,35 +305,36 @@ void ShapeValidator::visit(const ir::operation::RNN &node) const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; - const auto batch_size = _ctx.at(output_index).shape().dim(0); - const auto num_units = _ctx.at(output_index).shape().dim(1); - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 && - _ctx.at(hidden_state_out_index).shape().rank() == 2 && - _ctx.at(input_index).shape().rank() == 2 && - _ctx.at(weights_index).shape().rank() == 2 && - _ctx.at(recurrent_weights_index).shape().rank() == 2 && - _ctx.at(hidden_state_in_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1); - - OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_out_index).shape().dim(0)); - OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1)); - - OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(0) && - num_units == _ctx.at(bias_index).shape().dim(0)); - OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_in_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); + const auto batch_size = operands.at(output_index).shape().dim(0); + const auto num_units = operands.at(output_index).shape().dim(1); + + OP_REQUIRES(operands.at(output_index).shape().rank() == 2 && + operands.at(hidden_state_out_index).shape().rank() == 2 && + operands.at(input_index).shape().rank() == 2 && + operands.at(weights_index).shape().rank() == 2 && + operands.at(recurrent_weights_index).shape().rank() == 2 && + operands.at(hidden_state_in_index).shape().rank() == 2); + OP_REQUIRES(operands.at(bias_index).shape().rank() == 1); + + OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) && + batch_size == operands.at(hidden_state_in_index).shape().dim(0) && + batch_size == operands.at(hidden_state_out_index).shape().dim(0)); + OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1)); + + OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) && + num_units == operands.at(recurrent_weights_index).shape().dim(0) && + num_units == operands.at(bias_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) && + num_units == operands.at(recurrent_weights_index).shape().dim(1) && + num_units == operands.at(hidden_state_in_index).shape().dim(1) && + num_units == operands.at(hidden_state_out_index).shape().dim(1)); } void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; @@ -335,39 +342,40 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); + OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2); OP_REQUIRES(input_shape.C == output_shape.C); } void ShapeValidator::visit(const ir::operation::SpaceToDepth &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); const auto block_size = node.param().block_size; // All assertions as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0)); OP_REQUIRES(input_shape.N == output_shape.N); OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C); @@ -382,29 +390,31 @@ void ShapeValidator::visit(const ir::operation::ElementwiseBinary &) void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - const auto &output_obj = _ctx.at(output_index); - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &values_obj = _ctx.at(values_index); + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &values_obj = operands.at(values_index); // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729) { - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto &output_shape = output_obj.shape(); @@ -427,26 +437,28 @@ void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node) void ShapeValidator::visit(const ir::operation::ExpandDims &node) { + const auto &operands = _graph.operands(); const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - if (_ctx.at(axis_index).info().isDynamic()) + if (operands.at(axis_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1); + OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1); } void ShapeValidator::visit(const ir::operation::HashtableLookup &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - const auto &output_obj = _ctx.at(output_index); - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &keys_obj = _ctx.at(keys_index); - const auto &values_obj = _ctx.at(values_index); + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &keys_obj = operands.at(keys_index); + const auto &values_obj = operands.at(values_index); - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto &output_shape = output_obj.shape(); @@ -464,28 +476,30 @@ void ShapeValidator::visit(const ir::operation::HashtableLookup &node) void ShapeValidator::visit(const ir::operation::TransposeConv &node) { // shape check + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; // Only 4D tensors are supported - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank()); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank()); - const auto frontend_layout = _current_layout; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); // The kernel has only IHWO layout on frontend // So ker_shape is treated here below // I -> N // H -> H // W -> W // O -> C - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC); + const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC); OP_REQUIRES(ifm_shape.N == ofm_shape.N); OP_REQUIRES(ifm_shape.C == ker_shape.C); @@ -494,16 +508,17 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node) void ShapeValidator::visit(const ir::operation::Gather &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; - const auto ifm_shape = _ctx.at(ifm_index).shape(); - const auto indices_shape = _ctx.at(indices_index).shape(); - const auto ofm_shape = _ctx.at(ofm_index).shape(); + const auto ifm_shape = operands.at(ifm_index).shape(); + const auto indices_shape = operands.at(indices_index).shape(); + const auto ofm_shape = operands.at(ofm_index).shape(); OP_REQUIRES(ifm_shape.rank() <= 4); OP_REQUIRES(indices_shape.rank() <= 3); @@ -512,21 +527,22 @@ void ShapeValidator::visit(const ir::operation::Gather &node) void ShapeValidator::visit(const ir::operation::DepthToSpace &node) { + const auto &operands = _graph.operands(); int32_t block_size = node.param().block_size; // shape check const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; - const auto frontend_layout = _current_layout; - const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); - const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout); + const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); { OP_REQUIRES(output_shape.N == input_shape.N); @@ -539,22 +555,23 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node) void ShapeValidator::visit(const ir::operation::Pack &node) { + const auto &operands = _graph.operands(); const auto axis{node.param().axis}; const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; // shape check - const auto &output_shape = _ctx.at(output_index).shape(); + const auto &output_shape = operands.at(output_index).shape(); const auto output_rank = static_cast<int32_t>(output_shape.rank()); const auto input1_index{node.getInputs().at(0)}; - const auto input_shape = _ctx.at(input1_index).shape(); + const auto input_shape = operands.at(input1_index).shape(); OP_REQUIRES(axis >= -output_rank && axis < output_rank); for (const auto &index : node.getInputs()) { - OP_REQUIRES(input_shape == _ctx.at(index).shape()); + OP_REQUIRES(input_shape == operands.at(index).shape()); } } @@ -562,8 +579,9 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) { // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn // TODO Support dynamic rnn + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto scratch_buffer_index{ @@ -611,91 +629,96 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); - for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i) + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); + for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i) { - OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i)); + OP_REQUIRES(operands.at(input_index).shape().dim(i) == + operands.at(output_index).shape().dim(i)); } - OP_REQUIRES( - (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) && - (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) && - (!_ctx.exist(input_to_input_weights_index) || - _ctx.at(input_to_input_weights_index).shape().rank() == 2) && - _ctx.at(input_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(input_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(input_to_output_weights_index).shape().rank() == 2 && - (!_ctx.exist(recurrent_to_input_weights_index) || - _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) && - _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 && - (!_ctx.exist(projection_weights_index) || - _ctx.at(projection_weights_index).shape().rank() == 2) && - _ctx.at(output_state_in_index).shape().rank() == 2 && - _ctx.at(cell_state_in_index).shape().rank() == 2); - - OP_REQUIRES( - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().rank() == 1) && - (!_ctx.exist(cell_to_forget_weights_index) || - _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) && - (!_ctx.exist(cell_to_output_weights_index) || - _ctx.at(cell_to_output_weights_index).shape().rank() == 1) && - (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) && - _ctx.at(forget_gate_bias_index).shape().rank() == 1 && - _ctx.at(cell_bias_index).shape().rank() == 1 && - _ctx.at(output_gate_bias_index).shape().rank() == 1 && - (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1)); + OP_REQUIRES((operands.at(output_index).shape().rank() == 2 || + operands.at(output_index).shape().rank() == 3) && + (operands.at(input_index).shape().rank() == 2 || + operands.at(input_index).shape().rank() == 3) && + (!operands.exist(input_to_input_weights_index) || + operands.at(input_to_input_weights_index).shape().rank() == 2) && + operands.at(input_to_forget_weights_index).shape().rank() == 2 && + operands.at(input_to_cell_weights_index).shape().rank() == 2 && + operands.at(input_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(recurrent_to_input_weights_index) || + operands.at(recurrent_to_input_weights_index).shape().rank() == 2) && + operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(projection_weights_index) || + operands.at(projection_weights_index).shape().rank() == 2) && + operands.at(output_state_in_index).shape().rank() == 2 && + operands.at(cell_state_in_index).shape().rank() == 2); + + OP_REQUIRES((!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().rank() == 1) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().rank() == 1) && + operands.at(forget_gate_bias_index).shape().rank() == 1 && + operands.at(cell_bias_index).shape().rank() == 1 && + operands.at(output_gate_bias_index).shape().rank() == 1 && + (!operands.exist(projection_bias_index) || + operands.at(projection_bias_index).shape().rank() == 1)); // CIFG assertion - OP_REQUIRES( - ((!_ctx.exist(input_to_input_weights_index) || - (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) && - (!_ctx.exist(recurrent_to_input_weights_index) || - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) && - (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) && - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) || - ((_ctx.exist(input_to_input_weights_index) && - (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) && - (_ctx.exist(recurrent_to_input_weights_index) && - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) && - (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0))); + OP_REQUIRES(((!operands.exist(input_to_input_weights_index) || + (operands.at(input_to_input_weights_index).shape().dim(0) == 0 && + operands.at(input_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(recurrent_to_input_weights_index) || + (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) || + ((operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(input_gate_bias_index) && + operands.at(input_gate_bias_index).shape().dim(0) != 0))); // Peephole assertion - OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) || - _ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0) && - (!_ctx.exist(cell_to_output_weights_index) || - _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0)) || - ((_ctx.exist(cell_to_forget_weights_index) && - _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0) && - (_ctx.exist(cell_to_output_weights_index) && - _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0))); - - bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) && - (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0); + OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) || + ((operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) && + (operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0))); + + bool has_input_to_input_weights = + operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0); bool has_recurrent_to_input_weights = - _ctx.exist(recurrent_to_input_weights_index) && - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0); + operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0); bool has_input_gate_bias = - _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0; - bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) && - _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0; - bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) && - _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; - bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) && - _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; - bool has_projection_weights = _ctx.exist(projection_weights_index) && - (_ctx.at(projection_weights_index).shape().dim(0) != 0 && - _ctx.at(projection_weights_index).shape().dim(1) != 0); + operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0; + bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) && + operands.at(cell_to_input_weights_index).shape().dim(0) != 0; + bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = operands.exist(projection_weights_index) && + (operands.at(projection_weights_index).shape().dim(0) != 0 && + operands.at(projection_weights_index).shape().dim(1) != 0); bool has_projection_bias = - _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0; + operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0; // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). // true: no CIFG @@ -710,46 +733,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) // NOTE The projection weights may have data but the projection bias may not. bool has_projection_param = has_projection_weights; - const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major) - ? _ctx.at(input_index).shape().dim(1) - : _ctx.at(input_index).shape().dim(0); - OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) && - batch_size == _ctx.at(cell_state_in_index).shape().dim(0)); - - const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1); - OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_output_weights_index).shape().dim(1)); - - const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0); - OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_bias_index).shape().dim(0) && - num_units == _ctx.at(output_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_state_in_index).shape().dim(1)); + const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major) + ? operands.at(input_index).shape().dim(1) + : operands.at(input_index).shape().dim(0); + OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) && + batch_size == operands.at(cell_state_in_index).shape().dim(0)); + + const auto input_size = + operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1); + OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) && + input_size == operands.at(input_to_cell_weights_index).shape().dim(1) && + input_size == operands.at(input_to_output_weights_index).shape().dim(1)); + + const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0); + OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(input_to_output_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) && + num_units == operands.at(forget_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_bias_index).shape().dim(0) && + num_units == operands.at(output_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_state_in_index).shape().dim(1)); const auto output_size = - _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) && - output_size == _ctx.at(output_state_in_index).shape().dim(1)); + operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1); + OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) && + output_size == operands.at(output_state_in_index).shape().dim(1)); if (has_cifg_param) { - OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1)); - OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) && - ((_ctx.exist(cell_to_input_weights_index) && - num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0)) || - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) && - num_units == _ctx.at(input_gate_bias_index).shape().dim(0)); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1)); + OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1)); + OP_REQUIRES( + num_units == operands.at(input_to_input_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) && + ((operands.exist(cell_to_input_weights_index) && + num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) || + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) && + num_units == operands.at(input_gate_bias_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1)); OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights && has_input_gate_bias); if (has_cell_to_input_weights) @@ -757,64 +782,65 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole. OP_REQUIRES(has_peephole_param); } - if (_ctx.exist(scratch_buffer_index)) - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4); + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4); } else { - if (_ctx.exist(scratch_buffer_index)) - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3); + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3); } if (has_peephole_param) { - OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) && - (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); + OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(cell_to_output_weights_index).shape().dim(0) && + (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); } if (has_projection_param) { - OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1)); - OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1)); + OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0)); if (has_projection_bias) { - OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0)); } } - if (_ctx.exist(scratch_buffer_index)) + if (operands.exist(scratch_buffer_index)) { - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0)); + OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0)); } - if (_ctx.exist(output_state_out_index)) + if (operands.exist(output_state_out_index)) { - OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0)); - OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1)); + OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1)); } - if (_ctx.exist(cell_state_out_index)) + if (operands.exist(cell_state_out_index)) { - OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0)); - OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1)); + OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1)); } } void ShapeValidator::visit(const ir::operation::L2Normalization &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; - auto ifm_shape = _ctx.at(ifm_index).shape(); - auto ofm_shape = _ctx.at(ofm_index).shape(); + auto ifm_shape = operands.at(ifm_index).shape(); + auto ofm_shape = operands.at(ofm_index).shape(); OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank()); @@ -826,14 +852,15 @@ void ShapeValidator::visit(const ir::operation::L2Normalization &node) void ShapeValidator::visit(const ir::operation::Unpack &node) { + const auto &operands = _graph.operands(); const auto axis{node.param().axis}; const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; - const auto &input_shape = _ctx.at(input_index).shape(); + const auto &input_shape = operands.at(input_index).shape(); const auto input_rank = static_cast<int32_t>(input_shape.rank()); OP_REQUIRES(axis >= -input_rank && axis < input_rank); @@ -841,22 +868,23 @@ void ShapeValidator::visit(const ir::operation::Unpack &node) void ShapeValidator::visit(const ir::operation::Pad &node) { + const auto &operands = _graph.operands(); const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; - OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32); + OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32); const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; - const auto &pad_shape = _ctx.at(pad_index).shape(); - const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank()); + const auto &pad_shape = operands.at(pad_index).shape(); + const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank()); OP_REQUIRES(pad_shape.rank() == 2); OP_REQUIRES(pad_shape.dim(0) == input_rank); OP_REQUIRES(pad_shape.dim(1) == 2); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Select &) @@ -866,65 +894,70 @@ void ShapeValidator::visit(const ir::operation::Select &) void ShapeValidator::visit(const ir::operation::StridedSlice &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(input_index).shape().rank() <= 4); } void ShapeValidator::visit(const ir::operation::Split &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)}; const auto num_splits = node.param().num_splits; - const auto input_rank = _ctx.at(input_index).shape().rank(); - auto axis = *reinterpret_cast<const int32_t *>(_ctx.at(axis_index).data()->base()); + const auto input_rank = operands.at(input_index).shape().rank(); + auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base()); axis = axis < 0 ? axis + input_rank : axis; OP_REQUIRES(axis >= 0 && axis < input_rank); - OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0); + OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0); } void ShapeValidator::visit(const ir::operation::Shape &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; UNUSED_RELEASE(input_index); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1); + OP_REQUIRES(operands.at(output_index).shape().rank() == 1); } void ShapeValidator::visit(const ir::operation::ResizeBilinear &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) { return; } - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); } void ShapeValidator::visit(const ir::operation::Reverse &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::visit(const ir::operation::If &) @@ -940,17 +973,18 @@ void ShapeValidator::visit(const ir::operation::While &) void ShapeValidator::visit(const ir::operation::SquaredDifference &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - auto output_shape = _ctx.at(output_index).shape(); - auto lhs_shape = _ctx.at(lhs_index).shape(); - auto rhs_shape = _ctx.at(rhs_index).shape(); + auto output_shape = operands.at(output_index).shape(); + auto lhs_shape = operands.at(lhs_index).shape(); + auto rhs_shape = operands.at(rhs_index).shape(); // Check for output rank OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank())); auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank()); @@ -982,36 +1016,40 @@ void ShapeValidator::visit(const ir::operation::SquaredDifference &node) } void ShapeValidator::visit(const ir::operation::Tile &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; const auto multiple_index{node.getInputs().at(1)}; - OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank()); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); + OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1); + OP_REQUIRES(operands.at(multiple_index).shape().dim(0) == + operands.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Range &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)}; const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)}; const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0); + OP_REQUIRES(operands.at(start_index).shape().rank() == 0); + OP_REQUIRES(operands.at(limit_index).shape().rank() == 0); + OP_REQUIRES(operands.at(delta_index).shape().rank() == 0); } void ShapeValidator::visit(const ir::operation::MatrixBandPart &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)}; const auto num_lower_index{ @@ -1020,23 +1058,24 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node) node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix - OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar - OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar + OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix + OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar + OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar } void ShapeValidator::visit(const ir::operation::LogSoftmax &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } } // namespace compiler diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h index 763cf7ce3..a51e8adc0 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.h +++ b/runtime/onert/core/src/compiler/ShapeValidator.h @@ -39,8 +39,13 @@ class ShapeValidator : public ir::OperationVisitor public: ShapeValidator(void) = delete; ShapeValidator(const ir::Graph &graph); + ShapeValidator(const ShapeValidator &) = delete; + ShapeValidator(ShapeValidator &&) = delete; + ~ShapeValidator() = default; public: + ShapeValidator &operator=(const ShapeValidator &) = delete; + ShapeValidator &operator=(ShapeValidator &&) = delete; void operator()(); public: @@ -90,10 +95,7 @@ private: void checkUnaryOp(const ir::Operation &node); private: - // TODO Remove _ctx field const ir::Graph &_graph; - const ir::Operands &_ctx; - ir::Layout _current_layout; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index f2fee2c3c..485450560 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -19,62 +19,90 @@ #include "util/logging.h" #include <sstream> +#include <stdexcept> namespace onert { namespace compiler { - -void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind) +void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info, + bool unpredictable) { - StaticShapeInferer inferer(subg_ind, _lowered_subgs); - auto &lgraph = _lowered_subgs.at(subg_ind); - for (auto op_ind : lgraph->graph().topolSortOperations()) + assert(changed_operands_info.size() == _operands.size()); + for (size_t i = 0; i < changed_operands_info.size(); ++i) { - auto &op = lgraph->graph().operations().at(op_ind); - bool has_dynamic_tensor = inferer.infer(op); - lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor); + const auto &changed_operand_info = changed_operands_info.at(i); + auto &operand = _operands.at(i); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // This error check may by replaced by an assertion if this function is called after the + // validation of models are completed. + if (changed_operand_info.typeInfo() != operand->typeInfo()) + { + throw std::runtime_error("OperandObserver: The types of operands are mismatched"); + } + if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable)) + { + operand->info().setDynamic(); + } + else + { + const auto &new_shape = changed_operands_info.at(i).shape(); + operand->info().shape(new_shape); + } } } -bool StaticShapeInferer::infer(const ir::Operation &op) +void StaticShapeInferer::infer() { - bool has_dynamic_tensor = false; - - auto opcode = op.opcode(); - - _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit() - - // IF: need shape inference for then, else - // While: need shape inference for condition, body - if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) - { - op.accept(*this); - } - else + for (const auto &op_idx : _lowered_subg->graph().topolSortOperations()) { - _return_has_dynamic_tensor = checkDynamicInput(op); - - if (_return_has_dynamic_tensor) + const auto &op = _lowered_subg->graph().operations().at(op_idx); + bool has_dynamic_tensor = false; + const auto opcode = op.opcode(); + // IF: requires shape inference for then, else + // While: requires shape inference for condition, body + if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) { - setDynamicOutput(op); + op.accept(*this); } else { - op.accept(*this); + has_dynamic_tensor = checkDynamicInput(op); + if (has_dynamic_tensor) + { + setDynamicOutput(op); + } + else + { + op.accept(*this); + } } + has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op); + _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor); } - has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor; - - return has_dynamic_tensor; + if (_controlflow_output_observer != nullptr) + { + // re-sizing output shapes of the controflow operation branching to this subgraph + std::vector<ir::OperandInfo> outputs_info; + const auto &graph = _lowered_subg->graph(); + const auto &outputs = graph.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + const auto &operand_info = graph.operands().at(outputs.at(i)).info(); + outputs_info.emplace_back(operand_info); + } + _controlflow_output_observer->updateShapes(outputs_info); + } } bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) { + const auto &operands = _lowered_subg->graph().operands(); for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) { - if (_operands.at(input_idx).info().isDynamic()) + if (operands.at(input_idx).info().isDynamic()) { return true; } @@ -83,11 +111,25 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) return false; } +bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op) +{ + auto &operands = _lowered_subg->graph().operands(); + for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + { + if (operands.at(output_idx).info().isDynamic()) + { + return true; + } + } + return false; +} + void StaticShapeInferer::setDynamicOutput(const ir::Operation &op) { + auto &operands = _lowered_subg->graph().operands(); for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) { - _operands.at(output_idx).info().setDynamic(); + operands.at(output_idx).info().setDynamic(); } } @@ -95,11 +137,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx, const ir::OperandIndex rhs_idx) { - const auto &lhs = _operands.at(lhs_idx); - const auto &rhs = _operands.at(rhs_idx); + auto &operands = _lowered_subg->graph().operands(); + const auto &lhs = operands.at(lhs_idx); + const auto &rhs = operands.at(rhs_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape()); @@ -109,11 +152,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx) { - const auto &input = _operands.at(input_idx); + auto &operands = _lowered_subg->graph().operands(); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = input.info().shape(); @@ -136,36 +180,31 @@ void StaticShapeInferer::dump() return sstream.str(); }; - for (const auto &pair : _lowered_subgs) - { - const auto index = pair.first; - const auto &lowered_subg = pair.second; - VERBOSE(StaticShapeInferer) << index << std::endl; - lowered_subg->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &operand) { - VERBOSE(StaticShapeInferer) - << " " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " - << get_shape_str(operand.info().shape()) << std::endl; - }); - } + _lowered_subg->graph().operands().iterate( + [&](const ir::OperandIndex &ind, const ir::Operand &operand) { + VERBOSE(StaticShapeInferer) << " " << ind << ", " + << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " + << get_shape_str(operand.info().shape()) << std::endl; + }); } void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!axis.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -181,27 +220,31 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS); const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS); const auto output_index = op.getOutputs().at(0); - const auto &lhs = _operands.at(lhs_index); - const auto &rhs = _operands.at(rhs_index); - auto &output = _operands.at(output_index); + const auto &lhs = operands.at(lhs_index); + const auto &rhs = operands.at(rhs_index); + auto &output = operands.at(output_index); auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param()); output.info().shape(new_shape); } void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto cluster_idx{ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; - const auto &cluster = _operands.at(cluster_idx); + const auto &cluster = operands.at(cluster_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base()); assert(cluster_buf); @@ -214,17 +257,19 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op) void StaticShapeInferer::visit(const ir::operation::BCQGather &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; - const auto &indices = _operands.at(indices_idx); + const auto &indices = operands.at(indices_idx); const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)}; - const auto &input_binary = _operands.at(input_binary_idx); + const auto &input_binary = operands.at(input_binary_idx); const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; - const auto &cluster = _operands.at(cluster_idx); + const auto &cluster = operands.at(cluster_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base()); assert(cluster_buf); @@ -247,16 +292,16 @@ void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op) void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) { // get mutable output operand + auto &operands = _lowered_subg->graph().operands(); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); if (!shape.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -276,16 +321,18 @@ void StaticShapeInferer::visit(const ir::operation::Comparison &op) void StaticShapeInferer::visit(const ir::operation::Concat &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_count = op.getInputs().size(); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); shape_inference::Shapes input_shapes; for (uint32_t i = 0; i < input_count; i++) { const auto input_idx{op.getInputs().at(i)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); input_shapes.emplace_back(input.shape()); } @@ -297,12 +344,14 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op) void StaticShapeInferer::visit(const ir::operation::Conv2D &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)}; - const auto &ker = _operands.at(ker_idx); + const auto &ker = operands.at(ker_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = @@ -328,17 +377,18 @@ void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op) void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!axis.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -360,15 +410,16 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) void StaticShapeInferer::visit(const ir::operation::Fill &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!shape.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -390,15 +441,17 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op) void StaticShapeInferer::visit(const ir::operation::FullyConnected &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)}; - const auto &ker = _operands.at(ker_idx); + const auto &ker = operands.at(ker_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape()); @@ -412,15 +465,17 @@ void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op) void StaticShapeInferer::visit(const ir::operation::Gather &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)}; - const auto &indices = _operands.at(indices_idx); + const auto &indices = operands.at(indices_idx); const auto rank = input.info().shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -434,70 +489,21 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op) void StaticShapeInferer::visit(const ir::operation::If &op) { - auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph(); - auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph(); + // re-sizing input shapes of then/else subgraph const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()}; - const auto &outputs = op.getOutputs(); - // re-sizing input shapes of then subgraph - const auto &then_inputs = then_graph.getInputs(); - assert(inputs.size() == then_inputs.size()); + std::vector<ir::OperandInfo> inputs_info; + const auto &graph = _lowered_subg->graph(); for (size_t i = 0; i < inputs.size(); ++i) { - auto &then_input = then_graph.operands().at(then_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - then_input.info().setDynamic(); - } - else - { - auto new_shape = _operands.at(inputs.at(i)).info().shape(); - then_input.info().shape(new_shape); - } + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); } + _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().then_subg_index)->infer(); - // re-sizing input shapes of else subgraph - const auto &else_inputs = else_graph.getInputs(); - assert(inputs.size() == else_inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) - { - auto &else_input = else_graph.operands().at(else_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - else_input.info().setDynamic(); - } - else - { - const auto &new_shape = _operands.at(inputs.at(i)).info().shape(); - else_input.info().shape(new_shape); - } - } - - inferSubgraph(op.param().then_subg_index); - inferSubgraph(op.param().else_subg_index); - - // re-sizing output shapes - // TODO use then_graph / else_graph instead - const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs(); - const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs(); - assert(outputs.size() == then_outputs.size()); - assert(outputs.size() == else_outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) - { - const auto &then_output = then_graph.operands().at(then_outputs.at(i)); - const auto &else_output = else_graph.operands().at(else_outputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (!then_output.info().isDynamic() && !else_output.info().isDynamic() && - then_output.shape() == else_output.shape()) - { - output.info().shape(then_output.shape()); - } - else - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - } + _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().else_subg_index)->infer(); } void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) @@ -507,8 +513,10 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) void StaticShapeInferer::visit(const ir::operation::LSTM &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - auto &output = _operands.at(output_index); + auto &output = operands.at(output_index); const auto output_state_out_index{ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; @@ -518,24 +526,24 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; if (output.info().isDynamic() || - (_operands.exist(output_state_out_index) && - _operands.at(output_state_out_index).info().isDynamic()) || - (_operands.exist(cell_state_out_index) && - _operands.at(cell_state_out_index).info().isDynamic()) || - (_operands.exist(scratch_buffer_index) && - _operands.at(scratch_buffer_index).info().isDynamic())) + (operands.exist(output_state_out_index) && + operands.at(output_state_out_index).info().isDynamic()) || + (operands.exist(cell_state_out_index) && + operands.at(cell_state_out_index).info().isDynamic()) || + (operands.exist(scratch_buffer_index) && + operands.at(scratch_buffer_index).info().isDynamic())) return; const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto input_to_output_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; - const auto &input_to_output_weights = _operands.at(input_to_output_weights_index); + const auto &input_to_output_weights = operands.at(input_to_output_weights_index); const auto recurrent_to_output_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; - const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index); + const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index); // re-sizing outputs const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1) @@ -555,21 +563,21 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) output.info().shape(ir::Shape{n_batch, n_output}); } - if (_operands.exist(output_state_out_index)) + if (operands.exist(output_state_out_index)) { - auto &output_state_out = _operands.at(output_state_out_index); + auto &output_state_out = operands.at(output_state_out_index); output_state_out.info().shape(ir::Shape{n_batch, n_output}); } - if (_operands.exist(cell_state_out_index)) + if (operands.exist(cell_state_out_index)) { - auto &cell_state_out = _operands.at(cell_state_out_index); + auto &cell_state_out = operands.at(cell_state_out_index); cell_state_out.info().shape(ir::Shape{n_batch, n_cell}); } - if (_operands.exist(scratch_buffer_index)) + if (operands.exist(scratch_buffer_index)) { - auto &scratch_buffer = _operands.at(scratch_buffer_index); + auto &scratch_buffer = operands.at(scratch_buffer_index); const auto input_to_input_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; @@ -577,11 +585,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; bool has_input_to_input_weights = - _operands.at(input_to_input_weights_index).shape().dim(0) != 0 && - _operands.at(input_to_input_weights_index).shape().dim(1) != 0; + operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0; bool has_recurrent_to_input_weights = - _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). // true: no CIFG @@ -605,20 +613,21 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op) void StaticShapeInferer::visit(const ir::operation::OneHot &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)}; - const auto &indice = _operands.at(indice_idx); + const auto &indice = operands.at(indice_idx); const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)}; - const auto &depth = _operands.at(depth_idx); + const auto &depth = operands.at(depth_idx); const auto axis = op.param().axis; auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!depth.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -631,12 +640,14 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op) void StaticShapeInferer::visit(const ir::operation::Pack &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto rank = input.shape().rank() + 1; const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -651,21 +662,22 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op) void StaticShapeInferer::visit(const ir::operation::Pad &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)}; - const auto &pad = _operands.at(pad_idx); + const auto &pad = operands.at(pad_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // if pad is not constant, output also becomes dynamic if (!pad.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -678,10 +690,12 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op) void StaticShapeInferer::visit(const ir::operation::Permute &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape // Permute is a special operation that layouts of input/output may be different on backend @@ -700,16 +714,18 @@ void StaticShapeInferer::visit(const ir::operation::Pow &op) void StaticShapeInferer::visit(const ir::operation::Range &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)}; const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)}; const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)}; - const auto &start_op = _operands.at(start_idx); - const auto &limit_op = _operands.at(limit_idx); - const auto &delta_op = _operands.at(delta_idx); + const auto &start_op = operands.at(start_idx); + const auto &limit_op = operands.at(limit_idx); + const auto &delta_op = operands.at(delta_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); ir::Shape new_shape; if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant()) @@ -731,21 +747,22 @@ void StaticShapeInferer::visit(const ir::operation::Range &op) else { output.info().setDynamic(); - _return_has_dynamic_tensor = true; } } void StaticShapeInferer::visit(const ir::operation::Reduce &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)}; - const auto &axes = _operands.at(axes_idx); + const auto &axes = operands.at(axes_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); std::vector<int32_t> axes_vec; for (size_t i = 0; i < axes.shape().num_elements(); ++i) @@ -777,19 +794,21 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op) void StaticShapeInferer::visit(const ir::operation::Reshape &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // New shape is given by second input tensor if (op.getInputs().size() == 2) { // Let's check the second input const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); if (shape.isConstant()) { @@ -810,7 +829,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op) { // if shape is NOT Const, set output shape to be dynamic_ output.info().setDynamic(); - _return_has_dynamic_tensor = true; } } // New shape is given by option @@ -835,21 +853,22 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op) void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); int32_t height_out, width_out; if (op.getInputs().size() == 2) { - auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE)); + auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE)); if (!size.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } const auto size_v = size.asVector<std::int32_t>(); @@ -881,17 +900,19 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op) void StaticShapeInferer::visit(const ir::operation::Select &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)}; - const auto &input_cond = _operands.at(input_cond_idx); + const auto &input_cond = operands.at(input_cond_idx); const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; - const auto &input_true = _operands.at(input_true_idx); + const auto &input_true = operands.at(input_true_idx); const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - const auto &input_false = _operands.at(input_false_idx); + const auto &input_false = operands.at(input_false_idx); auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // Select output shpae ir::Shape new_shape = shape_inference::inferSelectShape( @@ -901,12 +922,14 @@ void StaticShapeInferer::visit(const ir::operation::Select &op) void StaticShapeInferer::visit(const ir::operation::Shape &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape output_shape; @@ -917,20 +940,21 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op) void StaticShapeInferer::visit(const ir::operation::Slice &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)}; - const auto &begins = _operands.at(begins_index); + const auto &begins = operands.at(begins_index); const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)}; - const auto &sizes = _operands.at(sizes_index); + const auto &sizes = operands.at(sizes_index); const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); + ir::Operand &output = operands.at(output_index); // Whether input is constant or not does not affect whether output is dynamic or not if (!(begins.isConstant() && sizes.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -959,21 +983,22 @@ void StaticShapeInferer::visit(const ir::operation::Softmax &op) void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto output_index = op.getOutputs().at(0); const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - ir::Operand &output = _operands.at(output_index); - const auto &input = _operands.at(input_idx); - const auto &block_shape = _operands.at(block_shape_idx); - const auto &padding = _operands.at(padding_idx); + ir::Operand &output = operands.at(output_index); + const auto &input = operands.at(input_idx); + const auto &block_shape = operands.at(block_shape_idx); + const auto &padding = operands.at(padding_idx); // Whether input is constant or not does not affect whether output is dynamic or not if (!(block_shape.isConstant() && padding.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -992,21 +1017,22 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) void StaticShapeInferer::visit(const ir::operation::Split &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); auto outputs = op.getOutputs(); if (!axis.isConstant()) { for (auto output_idx : outputs) { - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().setDynamic(); } - _return_has_dynamic_tensor = true; return; } @@ -1022,7 +1048,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op) shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits); for (auto output_idx : outputs) { - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().shape(new_shape); } } @@ -1035,11 +1061,13 @@ void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op) void StaticShapeInferer::visit(const ir::operation::Squeeze &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // Squeeze output shpae ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param()); @@ -1048,21 +1076,22 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op) void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; - const auto &starts = _operands.at(starts_index); + const auto &starts = operands.at(starts_index); const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; - const auto &ends = _operands.at(ends_index); + const auto &ends = operands.at(ends_index); const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - const auto &strides = _operands.at(strides_index); + const auto &strides = operands.at(strides_index); const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); + ir::Operand &output = operands.at(output_index); if (!(starts.isConstant() && ends.isConstant() && strides.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1085,19 +1114,20 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) void StaticShapeInferer::visit(const ir::operation::Tile &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)}; - const auto &multiplier = _operands.at(multiplier_idx); + const auto &multiplier = operands.at(multiplier_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!multiplier.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1112,11 +1142,13 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op) void StaticShapeInferer::visit(const ir::operation::Transpose &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; - const auto &perm = _operands.at(perm_idx); + const auto &perm = operands.at(perm_idx); // perm.shape() != ir::Shape{0} means that perm is (n-1...0) // TODO This condition changes to perm.num_elements() == 0 @@ -1124,11 +1156,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op) // get mutable output operand const auto output_idx = op.getOutputs().at(0); - auto &output = _operands.at(output_idx); + auto &output = operands.at(output_idx); if (!perm.isConstant() && !is_regular_transpose) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1157,8 +1188,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op) void StaticShapeInferer::visit(const ir::operation::Unpack &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto num = op.param().num; const auto rank = input.shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -1169,10 +1202,9 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op) for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) { const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().setDynamic(); } - _return_has_dynamic_tensor = true; return; } @@ -1182,69 +1214,43 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op) for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) { const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().shape(new_shape); } } void StaticShapeInferer::visit(const ir::operation::While &op) { - auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph(); - auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph(); + auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get(); + auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get(); + // re-sizing input shapes of body subgraph const auto inputs = op.getInputs(); - const auto &outputs = op.getOutputs(); - - // re-sizing input shapes of then subgraph - const auto &cond_inputs = cond_graph.getInputs(); - assert(inputs.size() == cond_inputs.size()); + std::vector<ir::OperandInfo> inputs_info; + const auto &graph = _lowered_subg->graph(); for (size_t i = 0; i < inputs.size(); ++i) { - const auto &input = _operands.at(inputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if (input.info().isDynamic()) - { - cond_input.info().setDynamic(); - } - else - { - auto new_shape = input.info().shape(); - cond_input.info().shape(new_shape); - } + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); } - // re-sizing input shapes of body subgraph - const auto &body_inputs = body_graph.getInputs(); - assert(cond_inputs.size() == body_inputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &body_input = body_graph.operands().at(body_inputs.at(i)); - if (cond_input.info().isDynamic()) - { - body_input.info().setDynamic(); - } - else - { - const auto &new_shape = cond_input.info().shape(); - body_input.info().shape(new_shape); - } - } - - // re-sizing operands of body subgraph - inferSubgraph(op.param().body_subg_index); + body_input_observer->updateShapes(inputs_info); + _child_inferers.at(op.param().body_subg_index)->infer(); // Check whether while operation's shapes are predictable - // If any of shape of body outputs and cond inputs are different, non-constant operands would be - // set to dynamic + // This while op's outputs are also updated in the above function + // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and + // thils op's outputs must have the same shape. So we can predict whether body subgraphs will + // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs + // and inputs are different Non-constant operands will be set to dynamic. bool check_unpredictable_dynamic = false; - const auto &body_outputs = body_graph.getOutputs(); - assert(body_outputs.size() == cond_inputs.size()); - for (size_t i = 0; i < body_outputs.size(); ++i) + const auto &updated_outputs = op.getOutputs(); + assert(inputs_info.size() == updated_outputs.size()); + for (size_t i = 0; i < updated_outputs.size(); ++i) { - const auto &body_output = body_graph.operands().at(body_outputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) || - (cond_input.shape() != body_output.shape())) + const auto &input_info = inputs_info.at(i); + const auto &output_info = graph.operands().at(updated_outputs.at(i)).info(); + if (input_info.isDynamic() != output_info.isDynamic() || + input_info.shape() != output_info.shape()) { check_unpredictable_dynamic = true; break; @@ -1253,53 +1259,11 @@ void StaticShapeInferer::visit(const ir::operation::While &op) if (check_unpredictable_dynamic) { - // Set inputs of body subgraph - for (const auto &input_index : body_inputs) - { - auto &input = body_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set inputs of cond subgraph - for (const auto &input_index : cond_inputs) - { - auto &input = cond_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set non-constant operands of body subgraph to dynamic - inferSubgraph(op.param().body_subg_index); - } - - // re-sizing operands of cond subgraph - // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to - // dynamic - inferSubgraph(op.param().cond_subg_index); - - // re-sizing outputs of while operation - // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic - assert(cond_inputs.size() == outputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (cond_input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - else - { - const auto new_shape = cond_input.info().shape(); - output.info().shape(new_shape); - } + body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().body_subg_index)->infer(); } + cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().cond_subg_index)->infer(); } void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) @@ -1307,24 +1271,52 @@ void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) // TODO: NMS supports very limited input/output size. ir::operation::DetectionPostProcess::Param param = op.param(); + auto &operands = _lowered_subg->graph().operands(); const int num_detected_boxes = param.max_detections * param.max_classes_per_detection; const auto output_idx1 = op.getOutputs().at(0); - auto &output1 = _operands.at(output_idx1); + auto &output1 = operands.at(output_idx1); output1.info().shape({1, num_detected_boxes, 4}); const auto output_idx2 = op.getOutputs().at(1); - auto &output2 = _operands.at(output_idx2); + auto &output2 = operands.at(output_idx2); output2.info().shape({1, num_detected_boxes}); const auto output_idx3 = op.getOutputs().at(2); - auto &output3 = _operands.at(output_idx3); + auto &output3 = operands.at(output_idx3); output3.info().shape({1, num_detected_boxes}); const auto output_idx4 = op.getOutputs().at(3); - auto &output4 = _operands.at(output_idx4); + auto &output4 = operands.at(output_idx4); output4.info().shape({1}); } +void StaticShapeInferer::visit(const ir::operation::Bulk &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + // TODO: support multiple inputs/outputs + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + auto cur_input_shape = input.info().shape(); + auto origin_input_shape = op.param().origin_input_shapes[0]; + auto cur_output_shape = output.info().shape(); + auto origin_output_shape = op.param().origin_output_shapes[0]; + + // TODO: more check for valid batch request + assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0)); + assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0); + size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0); + + ir::Shape new_shape; + new_shape.append(origin_output_shape.dim(0) * batch_multiplier); + for (int32_t d = 1; d < origin_output_shape.rank(); ++d) + new_shape.append(origin_output_shape.dim(d)); + + output.info().shape(new_shape); +} } // namespace compiler diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h index 2a99db781..b3cc0bbe3 100644 --- a/runtime/onert/core/src/compiler/TensorRegistries.h +++ b/runtime/onert/core/src/compiler/TensorRegistries.h @@ -17,13 +17,14 @@ #ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__ #define __ONERT_COMPILER_TENSOR_REGISTRIES_H__ -#include <unordered_set> -#include <memory> -#include "backend/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/TensorRegistry.h" + #include "backend/Backend.h" -#include "backend/builtin/Config.h" -#include "backend/builtin/TensorBuilder.h" -#include "backend/builtin/TensorRegistry.h" +#include "backend/BackendContext.h" + +#include <memory> +#include <unordered_set> namespace onert { diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc index 181f388de..c27ce3d09 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc @@ -15,7 +15,6 @@ */ #include "PermutationEliminationPass.h" -#include "backend/builtin/Config.h" #include "util/logging.h" diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index 6f9899114..71efa1bb5 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -17,18 +17,16 @@ #include "PermutationInsertionPass.h" -#include <cassert> -#include <utility> -#include <unordered_map> +#include "../../backend/builtin/Config.h" -#include "backend/builtin/Config.h" -#include "ir/Operand.h" #include "compiler/OperationLowerInfo.h" -#include "ir/Graph.h" -#include "backend/IConfig.h" +#include "ir/operation/Permute.h" #include "util/logging.h" + +#include <cassert> #include <memory> -#include "ir/operation/Permute.h" +#include <unordered_map> +#include <utility> namespace onert { @@ -125,6 +123,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde // backend auto &model_outputs = _graph.getOutputs(); const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin(); + assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID); + if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend) { model_outputs.replace(operand_index, out_operand_index); @@ -141,6 +141,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde const auto permute_node_layout = ir::Layout::UNKNOWN; // NOTE If one backend supports several layout, the backend must support Permute operation const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin(); + assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID); + if (input_backend == output_backend) { permute_node_backend = input_backend; diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc index b18dedd15..572b4df24 100644 --- a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc @@ -14,9 +14,11 @@ * limitations under the License. */ -#include <gtest/gtest.h> +#include "UnusedOperandEliminationPass.h" + #include "ir/Graph.h" -#include "compiler/pass/UnusedOperandEliminationPass.h" + +#include <gtest/gtest.h> using namespace onert::ir; using namespace onert::compiler::pass; diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc index 714fb6fda..0bb2fa11f 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.cc +++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc @@ -19,6 +19,7 @@ #include "DotDumper.h" #include "DotBuilder.h" +#include "ir/OperandIndexMap.h" #include "ir/OperationIndexMap.h" #include "backend/Backend.h" #include "backend/IConfig.h" @@ -31,97 +32,72 @@ namespace dumper namespace dot { -void DotDumper::dump(const std::string &tag) +namespace { - if (_level == Level::OFF) - { - return; - } - - onert::dumper::dot::DotBuilder dot_builder; - - auto &operations = _graph.operations(); - auto &operands = _graph.operands(); - - ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes; - std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes; - - auto backend_to_fillcolor = [](const backend::Backend *backend) { - static const auto map = []() { - std::unordered_map<const backend::Backend *, std::string> ret; - uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( - for (const auto backend : compiler::BackendManager::get().getAll()) - { - ret.emplace(backend, Node::BG_COLORS[index]); - index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); - } - return ret; - }(); - - auto itr = map.find(backend); - if (itr == map.end()) - { - return Node::DEFAULT_FILLCOLOR; - } - else +std::string backend_to_fillcolor(const backend::Backend *backend) +{ + static const auto map = []() { + std::unordered_map<const backend::Backend *, std::string> ret; + uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( + for (const auto backend : compiler::BackendManager::get().getAll()) { - return itr->second; + ret.emplace(backend, Node::BG_COLORS[index]); + index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); } - }; + return ret; + }(); + auto itr = map.find(backend); + if (itr == map.end()) + { + return Node::DEFAULT_FILLCOLOR; + } + else + { + return itr->second; + } +} - util::Set<ir::OperandIndex> shown_operand_set; +std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> +generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level) +{ + std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands; + const auto &operands = graph.operands(); operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) { - bool showing_cond = false; - if (_level == Level::ALL) - { - showing_cond = true; - } - else - { - showing_cond = - !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index); - } + bool showing_cond = + level == DotDumper::Level::ALL + ? true + : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index); if (showing_cond) { - shown_operand_set.add(index); - auto type = [&]() { using onert::dumper::dot::Operand; - if (_graph.getInputs().contains(index)) + if (graph.getInputs().contains(index)) return Operand::Type::MODEL_INPUT; - if (_graph.getOutputs().contains(index)) + if (graph.getOutputs().contains(index)) return Operand::Type::MODEL_OUTPUT; return Operand::Type::INTERNAL; }(); auto node = std::make_unique<Operand>(index, type); + std::string label = std::to_string(index.value()); + std::string fillcolor = ""; + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); - { - // Display LowerInfo attributes - std::string label = std::to_string(index.value()); - std::string fillcolor = ""; - if (_lowered_graph) - { - auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index); - const auto &def_factors = lower_info->def_factors(); - if (def_factors.size() > 0) - { - label += "\\n["; - label += def_factors.getOnlyElement().backend()->config()->id(); - label += "]"; - - fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); - } - } - node->setAttribute("label", label); - node->setAttribute("fillcolor", fillcolor); - } - - operand_nodes.emplace(index, std::move(node)); + dot_operands.emplace(index, std::move(node)); } }); + return dot_operands; +} + +ir::OperationIndexMap<std::unique_ptr<Operation>> +generate_dot_operations(const ir::Graph &graph, + const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands) +{ + ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations; + const auto &operations = graph.operations(); operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) { auto node = std::make_unique<Operation>(index, op); @@ -130,42 +106,79 @@ void DotDumper::dump(const std::string &tag) using onert::dumper::dot::Operand; // Constant input and dump level is ALL_BUT_CONSTANTS - if (operand_nodes.find(input) == operand_nodes.end()) + if (dot_operands.find(input) == dot_operands.end()) continue; - auto &input_node = operand_nodes.at(input); + auto &input_node = dot_operands.at(input); input_node->addOutEdge(node.get()); } for (auto output : op.getOutputs() | ir::Remove::UNDEFINED) { using onert::dumper::dot::Operand; - auto &output_node = operand_nodes.at(output); + auto &output_node = dot_operands.at(output); node->addOutEdge(output_node.get()); } - operation_nodes.emplace(index, std::move(node)); + dot_operations.emplace(index, std::move(node)); }); - if (_lowered_graph) - { - _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { - const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index); - if (lower_info) + return dot_operations; +} + +void update_lower_info(const compiler::LoweredGraph &lowered_graph, + ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands) +{ + const auto &operands = lowered_graph.graph().operands(); + operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + auto itr = dot_operands->find(index); + if (itr != dot_operands->end()) + { + auto &node = itr->second; + // Display LowerInfo attributes + std::string label = node->getAttribute("label"); + std::string fillcolor = node->getAttribute("fillcolor"); + auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index); + const auto &def_factors = lower_info->def_factors(); + if (def_factors.size() > 0) { - auto fillcolor = backend_to_fillcolor(lower_info->backend()); - std::string backend_label = "[" + lower_info->backend()->config()->id() + "]"; - auto itr = operation_nodes.find(index); - if (itr != operation_nodes.end()) - { - auto &node = itr->second; - node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label); - node->setAttribute("fillcolor", fillcolor); - } + label += "\\n["; + label += def_factors.getOnlyElement().backend()->config()->id(); + label += "]"; + fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); } - }); - } + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); + } + }); +} +void update_lower_info(const compiler::LoweredGraph &lowered_graph, + ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations) +{ + const auto &operations = lowered_graph.graph().operations(); + operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index); + if (lower_info) + { + auto fillcolor = backend_to_fillcolor(lower_info->backend()); + std::string backend_label = "[" + lower_info->backend()->config()->id() + "]"; + auto itr = dot_operations->find(index); + if (itr != dot_operations->end()) + { + auto &node = itr->second; + node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label); + node->setAttribute("fillcolor", fillcolor); + } + } + }); +} + +void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes, + const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes, + const std::string &tag) +{ + onert::dumper::dot::DotBuilder dot_builder; for (const auto &e : operation_nodes) dot_builder.update(*e.second); for (const auto &e : operand_nodes) @@ -186,6 +199,33 @@ void DotDumper::dump(const std::string &tag) fb.close(); } } +} // namespace + +void DotDumper::dump(const ir::Graph &graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + const auto dot_operands = generate_dot_operands(graph, _level); + const auto dot_operations = generate_dot_operations(graph, dot_operands); + dump_to_file(dot_operands, dot_operations, tag); +} + +void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level); + auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands); + update_lower_info(lowered_graph, &dot_operands); + update_lower_info(lowered_graph, &dot_operations); + dump_to_file(dot_operands, dot_operations, tag); +} } // namespace dot } // namespace dumper diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h index f300c3432..6249010d3 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.h +++ b/runtime/onert/core/src/dumper/dot/DotDumper.h @@ -38,27 +38,28 @@ public: }; public: - DotDumper(const ir::Graph &graph, Level level) - : _lowered_graph{nullptr}, _graph(graph), _level{level} - { - } - DotDumper(const compiler::LoweredGraph *lowered_graph, Level level) - : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level} - { - } + DotDumper(Level level) : _level{level} {} public: /** - * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set + * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set + * + * @param[in] graph The graph that would be used to get operations and operands + * @param[in] tag The name of dot file that would be created + * @return N/A + */ + void dump(const ir::Graph &graph, const std::string &tag); + + /** + * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set * + * @param[in] graph The graph that would be used to get operations and operands * @param[in] tag The name of dot file that would be created * @return N/A */ - void dump(const std::string &tag); + void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag); private: - const compiler::LoweredGraph *_lowered_graph; - const ir::Graph &_graph; Level _level; }; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index bcac19d2e..1649be733 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -17,19 +17,18 @@ #ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ #define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ -#include <list> -#include <map> -#include <unordered_map> - -#include "exec/FunctionSequence.h" +#include "ExecutorBase.h" #include "Job.h" -#include "ir/OperandIndexSequence.h" -#include "ir/Index.h" -#include <memory> -#include "exec/ExecutorBase.h" + #include "compiler/CodeMap.h" +#include "ir/OperandIndexSequence.h" #include "util/TracingCtx.h" +#include <list> +#include <map> +#include <memory> +#include <unordered_map> + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc index 6bf2744a9..4b82655b9 100644 --- a/runtime/onert/core/src/exec/ExecTime.cc +++ b/runtime/onert/core/src/exec/ExecTime.cc @@ -14,12 +14,10 @@ * limitations under the License. */ -#include "exec/ExecTime.h" +#include "ExecTime.h" -#include <fstream> -#include <cassert> -#include <limits> #include <algorithm> +#include <cassert> namespace onert { diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc index 178b61ea5..1f7152e7b 100644 --- a/runtime/onert/test/core/exec/ExecTime.test.cc +++ b/runtime/onert/core/src/exec/ExecTime.test.cc @@ -14,10 +14,13 @@ * limitations under the License. */ -#include "exec/ExecTime.h" +#include "ExecTime.h" + #include "backend/IConfig.h" #include "backend/Backend.h" + #include <gtest/gtest.h> + #include <string> namespace diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc index 8eff73bac..9d1e06d6c 100644 --- a/runtime/onert/core/src/exec/Execution.cc +++ b/runtime/onert/core/src/exec/Execution.cc @@ -23,13 +23,12 @@ namespace onert namespace exec { -Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors} +Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors} { assert(executors != nullptr); assert(executors->at(ir::SubgraphIndex{0}) != nullptr); - const auto &primary_subg = primary_subgraph(); - _io_desc.inputs.resize(primary_subg.getInputs().size()); - _io_desc.outputs.resize(primary_subg.getOutputs().size()); + _io_desc.inputs.resize(_executors->inputSize()); + _io_desc.outputs.resize(_executors->outputSize()); sem_init(&_async_io_descs_sem, 0, 1); } @@ -48,8 +47,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length, ir::Layout layout) { - const auto input_index = primary_subgraph().getInputs().at(index); - const auto info = primary_subgraph().operands().at(input_index).info(); + const auto info = _executors->inputInfo(index); // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor @@ -105,8 +103,7 @@ bool Execution::isEmptyQueue() void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length, ir::Layout layout) { - const auto input_index = primary_subgraph().getInputs().at(index); - const auto info = primary_subgraph().operands().at(input_index).info(); + const auto info = _executors->inputInfo(index); IODescription *_async_io_desc = _async_io_descs.back().first; { @@ -135,8 +132,7 @@ void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) { - const auto output_index = primary_subgraph().getOutputs().at(index); - const auto info = primary_subgraph().operands().at(output_index).info(); + const auto info = _executors->outputInfo(index); IODescription *_async_io_desc = _async_io_descs.front().first; if (length < info.total_size()) @@ -165,8 +161,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con // TODO Remove default parameter void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) { - const auto output_index = primary_subgraph().getOutputs().at(index); - const auto info = primary_subgraph().operands().at(output_index).info(); + const auto info = _executors->outputInfo(index); if (length < info.total_size()) { @@ -208,7 +203,7 @@ void Execution::execute() { VERBOSE(Execution) << "Start execution" << std::endl; - primary_executor()->execute(_io_desc); + _executors->execute(_io_desc); finished = true; VERBOSE(Execution) << "Execution finished" << std::endl; @@ -248,8 +243,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const auto itr = _io_desc.dynamic_input_shapes.find(ind); if (itr == _io_desc.dynamic_input_shapes.end()) { - auto operand_idx = primary_subgraph().getInputs().at(ind); - return primary_subgraph().operands().at(operand_idx).shape(); + return _executors->inputInfo(ind).shape(); } else { diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/core/src/exec/Execution.test.cc index 0183b6276..e3ea49470 100644 --- a/runtime/onert/test/core/exec/ExecInstance.cc +++ b/runtime/onert/core/src/exec/Execution.test.cc @@ -14,15 +14,16 @@ * limitations under the License. */ -#include <gtest/gtest.h> -#include <thread> +#include "exec/Execution.h" -#include "ir/Graph.h" #include "compiler/Compiler.h" -#include "exec/Execution.h" +#include "ir/Graph.h" #include "ir/operation/BinaryArithmetic.h" #include "util/TracingCtx.h" +#include <gtest/gtest.h> +#include <thread> + namespace { @@ -76,24 +77,24 @@ public: graph->verify(); // Compile - auto subgs = std::make_shared<onert::ir::Subgraphs>(); - subgs->push(onert::ir::SubgraphIndex{0}, graph); - tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get()); - onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; - executors = compiler.compile(); + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, graph); + coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + artifact = compiler.compile(); } public: std::shared_ptr<Graph> graph; - std::shared_ptr<onert::exec::ExecutorMap> executors; - std::unique_ptr<onert::util::TracingCtx> tracing_ctx; + std::unique_ptr<onert::compiler::CompilerOptions> coptions; + std::shared_ptr<onert::compiler::CompilerArtifact> artifact; }; TEST(ExecInstance, simple) { auto mockup = CompiledMockUpModel(); auto graph = mockup.graph; - auto executors = mockup.executors; + auto executors = mockup.artifact->_executors; auto input1 = IOIndex{0}; auto input2 = IOIndex{1}; @@ -121,7 +122,7 @@ TEST(ExecInstance, twoCompile) { auto mockup = CompiledMockUpModel(); auto graph = mockup.graph; - auto executors1 = mockup.executors; + auto executors1 = mockup.artifact->_executors; onert::exec::Execution execution1{executors1}; auto input1 = IOIndex{0}; @@ -138,12 +139,12 @@ TEST(ExecInstance, twoCompile) execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16); // Make new executor: compile again - auto subgs = std::make_shared<onert::ir::Subgraphs>(); - subgs->push(onert::ir::SubgraphIndex{0}, graph); - auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get()); - onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; - std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile(); - onert::exec::Execution execution2{executors2}; + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, graph); + auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile(); + onert::exec::Execution execution2{artifact->_executors}; const float exe2_input1_buffer[4] = {2, 1, -2, 0}; const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; @@ -168,7 +169,7 @@ TEST(ExecInstance, twoCompile) TEST(ExecInstance, twoExecution) { auto mockup = CompiledMockUpModel(); - auto executors = mockup.executors; + auto executors = mockup.artifact->_executors; auto input1 = IOIndex{0}; auto input2 = IOIndex{1}; auto output1 = IOIndex{0}; @@ -208,7 +209,7 @@ class Inference { public: Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], - std::shared_ptr<onert::exec::ExecutorMap> &executors) + std::shared_ptr<onert::exec::Executors> &executors) : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} { // DO NOTHING @@ -232,14 +233,14 @@ private: const float (&_input1)[4]; const float (&_input2)[4]; float (&_output)[4]; - std::shared_ptr<onert::exec::ExecutorMap> &_executors; + std::shared_ptr<onert::exec::Executors> &_executors; }; // Support multi-thread execution TEST(ExecInstance, twoThreads) { auto mockup = CompiledMockUpModel(); - auto executors = mockup.executors; + auto executors = mockup.artifact->_executors; const float exe1_input1_buffer[4] = {1, 0, -1, -2}; const float exe1_input2_buffer[4] = {1, -3, 2, -4}; @@ -273,7 +274,7 @@ TEST(ExecInstance, async) { auto mockup = CompiledMockUpModel(); auto graph = mockup.graph; - auto executors = mockup.executors; + auto executors = mockup.artifact->_executors; auto input1 = IOIndex{0}; auto input2 = IOIndex{1}; diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h index 423b5026b..3ee1754c9 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.h +++ b/runtime/onert/core/src/exec/ExecutionObservee.h @@ -17,11 +17,12 @@ #ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__ #define __ONERT_EXEC_EXECUTION_OBSERVEE_H__ -#include <list> +#include "ExecutionObservers.h" -#include "exec/ExecutionObservers.h" #include "ir/Index.h" +#include <list> + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 386178ae6..9abde7ba4 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "exec/ExecutionObservers.h" +#include "ExecutionObservers.h" -#include <string> -#include <sstream> +#include "../util/EventWriter.h" #include "util/logging.h" -#include "exec/IExecutor.h" -#include "misc/polymorphic_downcast.h" -#include "ir/Operation.h" -#include "util/EventWriter.h" + +#include <misc/polymorphic_downcast.h> + +#include <string> +#include <sstream> namespace { diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index 4c6c7b18e..1aadac2f5 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -17,17 +17,16 @@ #ifndef __ONERT_EXEC_OBSREVERS_H__ #define __ONERT_EXEC_OBSREVERS_H__ -#include "exec/IFunction.h" +#include "ExecTime.h" +#include "../util/EventCollector.h" +#include "../util/EventRecorder.h" +#include "../util/EventWriter.h" + +#include "exec/Executors.h" #include "ir/Index.h" #include "ir/Operation.h" -#include "ExecTime.h" #include "util/ITimer.h" -#include "exec/IExecutor.h" -#include "util/EventCollector.h" -#include "util/EventRecorder.h" -#include "util/EventWriter.h" #include "util/TracingCtx.h" -#include "util/EventWriter.h" namespace onert { diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index efc22cfa5..d2d204a0b 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -15,11 +15,10 @@ */ #include "ExecutorBase.h" + #include "ShapeConverter.h" -#include "backend/builtin/UserTensor.h" -#include "util/logging.h" -#include "misc/polymorphic_downcast.h" +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index c0f609d11..e4f914546 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -17,22 +17,17 @@ #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__ #define __ONERT_EXEC_EXECUTOR_BASE_H__ -#include "IPermuteFunction.h" +#include "ExecutionObservee.h" +#include "../backend/builtin/IOTensor.h" +#include "../compiler/TensorRegistries.h" + +#include "compiler/LoweredGraph.h" #include "exec/IExecutor.h" -#include "exec/ExecTime.h" -#include "exec/ExecutionObservee.h" -#include "exec/IFunction.h" #include "exec/IODescription.h" #include "ir/Graph.h" -#include "ir/Index.h" -#include "compiler/GraphLowerInfo.h" #include "ir/OperationIndexMap.h" -#include "compiler/LoweredGraph.h" -#include "compiler/TensorRegistries.h" -#include "backend/builtin/IOTensor.h" #include "util/TracingCtx.h" -#include <cstdint> #include <memory> #include <mutex> #include <vector> diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc new file mode 100644 index 000000000..e0ee24fea --- /dev/null +++ b/runtime/onert/core/src/exec/Executors.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Executors.h" + +namespace onert +{ +namespace exec +{ + +uint32_t Executors::inputSize() const +{ + return _model_edges ? _model_edges->pkg_inputs.size() + : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size(); +} + +uint32_t Executors::outputSize() const +{ + return _model_edges ? _model_edges->pkg_outputs.size() + : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size(); +} + +const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index) +{ + if (_model_edges) + { + // Assume that each model may have only one subgraph + // TODO handle general case + const auto desc = _model_edges->pkg_inputs[index.value()]; + const auto model_idx = std::get<0>(desc); + const auto executor_idx = ir::SubgraphIndex{model_idx.value()}; + const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc)); + return _executors.at(executor_idx)->graph().operands().at(input_index).info(); + } + + const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index); + return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info(); +} + +const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index) +{ + if (_model_edges) + { + // Assume that each model may have only one subgraph + // TODO handle general case + auto desc = _model_edges->pkg_outputs[index.value()]; + auto model_idx = std::get<0>(desc); + auto executor_idx = ir::SubgraphIndex{model_idx.value()}; + auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc)); + return _executors.at(executor_idx)->graph().operands().at(output_index).info(); + } + + auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index); + return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info(); +} + +void Executors::execute(const IODescription &desc) +{ + if (_model_edges) + return executeEntries(desc); + + _executors.at(ir::SubgraphIndex{0})->execute(desc); +} + +void Executors::executeEntries(const IODescription &desc) +{ + // Assume 2 executors only + // Assume that each model may have only one subgraph + // TODO Support general case + if (_executors.size() != 2) + throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"}; + + // Assume all edges are 0:0:x -> 1:0:x + for (auto edge : _model_edges->edges) + { + if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) || + (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) || + (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) || + (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) || + (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to))) + throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"}; + } + + // Assume all package inputs are 0:0:x + for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++) + { + auto input = _model_edges->pkg_inputs[i]; + if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) || + (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) || + (std::get<ir::IOIndex>(input) != ir::IOIndex{i})) + { + throw std::runtime_error{"NYI: Support package input to 1st model with same order"}; + } + } + + // Assume all package outputs are 1:0:x + for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++) + { + auto output = _model_edges->pkg_outputs[i]; + if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) || + (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) || + (std::get<ir::IOIndex>(output) != ir::IOIndex{i})) + { + throw std::runtime_error{"NYI: Support package output from 2nd model with same order"}; + } + } + + const auto &executor1 = _executors.at(ir::SubgraphIndex{0}); + const auto &graph1 = executor1->graph(); + const auto &executor2 = _executors.at(ir::SubgraphIndex{1}); + const auto &graph2 = executor2->graph(); + + if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) || + (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) || + (graph1.getOutputs().size() != graph2.getInputs().size()) || + (graph1.getOutputs().size() != _model_edges->edges.size())) + { + throw std::runtime_error{"NYI: Unsupported model edge pattern"}; + } + + // Prepare buffer + // Assume buffer layout is NHWC + std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size()); + std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size()); + const auto layout = ir::Layout::NHWC; + + for (uint32_t i = 0; i < graph1.getOutputs().size(); i++) + { + const auto buf_index = + _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i}); + buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info(); + const auto buf_size = buf_infos[i]->total_size(); + bufs[i] = std::make_unique<uint8_t[]>(buf_size); + } + + // 1st executor + { + IODescription desc1; + const auto input_size = graph1.getInputs().size(); + const auto output_size = graph1.getOutputs().size(); + desc1.inputs.resize(input_size); + desc1.outputs.resize(output_size); + for (uint32_t i = 0; i < input_size; i++) + desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get()); + for (uint32_t i = 0; i < output_size; i++) + desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(), + buf_infos[i]->total_size(), layout); + + executor1->execute(desc1); + } + + // 2nd executor + { + IODescription desc2; + const auto input_size = graph2.getInputs().size(); + const auto output_size = graph2.getOutputs().size(); + desc2.inputs.resize(input_size); + desc2.outputs.resize(output_size); + for (uint32_t i = 0; i < input_size; i++) + desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(), + buf_infos[i]->total_size(), layout); + for (uint32_t i = 0; i < output_size; i++) + desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get()); + + executor2->execute(desc2); + } +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc index df68b1b64..f87c271f7 100644 --- a/runtime/onert/core/src/exec/FunctionSequence.cc +++ b/runtime/onert/core/src/exec/FunctionSequence.cc @@ -34,9 +34,7 @@ void FunctionSequence::run() // Thus, those two bakends cannot reach here. // Do dynamic shape inference - auto op_ind = _dynamic_tensor_ctx->op_ind; - auto &op = _dynamic_tensor_ctx->operations->at(op_ind); - op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); + _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); for (const auto &function : _functions) { diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc index b29216a2f..d149345fd 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.cc +++ b/runtime/onert/core/src/exec/JSONExecTime.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "exec/JSONExecTime.h" -#include "backend/IConfig.h" +#include "JSONExecTime.h" + #include <fstream> namespace onert diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index 39d653154..a833466da 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -22,11 +22,10 @@ #ifndef __ONERT_EXEC_EXECUTOR_H_ #define __ONERT_EXEC_EXECUTOR_H_ -#include "ir/Index.h" #include "ExecutorBase.h" -#include "compiler/Linear.h" -#include "exec/FunctionSequence.h" + #include "compiler/CodeMap.h" +#include "ir/Index.h" #include "util/TracingCtx.h" namespace onert diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 7f107fa22..7d459b0b4 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -17,19 +17,13 @@ #ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__ #define __ONERT_EXEC_PARALLEL_EXECUTOR_H__ -#include <list> -#include <queue> -#include <unordered_map> - -#include "exec/FunctionSequence.h" -#include "Job.h" -#include "ir/OperandIndexSequence.h" -#include "ir/Index.h" -#include <memory> -#include "exec/DataflowExecutor.h" +#include "DataflowExecutor.h" #include "ParallelScheduler.h" + #include "util/TracingCtx.h" +#include <memory> + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h new file mode 100644 index 000000000..1d2d375e2 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/MockTensor.h @@ -0,0 +1,66 @@ + +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/ITensor.h" + +template <typename T> class MockTensor : public onert::backend::ITensor +{ +public: + MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout) + : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout) + { + } + +public: + uint8_t *buffer() const override { return _buf; } + + size_t calcOffset(const onert::ir::Coordinates &coords) const override + { + size_t rank = _shape.rank(); + rank = rank == 0 ? 1 : rank; + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i); + offset = offset * dim + coords[i]; + } + offset *= sizeof(T); + + return offset; + } + + onert::ir::Shape getShape() const override { return _shape; } + +public: // DUMMY methods + size_t total_size() const override { return 0; } + onert::ir::Layout layout() const override { return _layout; } + onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; } + float data_scale() const override { return 0; } + int32_t data_zero_point() const override { return 0; } + const std::vector<float> &data_scales() const override { return _dummy_scales; } + const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; } + bool has_padding() const override { return false; } + void access(const std::function<void(ITensor &tensor)> &fn) override {} + bool is_dynamic() const override { return false; } + +private: + uint8_t *_buf = nullptr; + onert::ir::Shape _shape; + onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN; + std::vector<float> _dummy_scales; + std::vector<int32_t> _dummy_zerops; +}; diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc new file mode 100644 index 000000000..f439cafb5 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class Reader_nchw : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW); + _reader = std::make_shared<nchw::Reader<T>>(_tensor.get()); + } + + std::shared_ptr<Reader<T>> _reader = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(Reader_nchw, ReaderTypes); + +TYPED_TEST(Reader_nchw, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createReader(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc new file mode 100644 index 000000000..c6dcda710 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class View_nchw : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW); + _view = std::make_shared<nchw::View<T>>(_tensor.get()); + } + + std::shared_ptr<nchw::View<T>> _view = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(View_nchw, ViewTypes); + +TYPED_TEST(View_nchw, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createView(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc new file mode 100644 index 000000000..773199042 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class Reader_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC); + _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get()); + } + + std::shared_ptr<nhwc::Reader<T>> _reader = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes); +TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes); + +TYPED_TEST(Reader_nhwc, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createReader(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h index 40d1d237c..c98d050c3 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/View.h +++ b/runtime/onert/core/src/exec/feature/nhwc/View.h @@ -17,7 +17,7 @@ #ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ #define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ -#include "../Reader.h" +#include "Reader.h" #include <cassert> #include <cstddef> diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc new file mode 100644 index 000000000..bdd73d5a7 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class View_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC); + _view = std::make_shared<nhwc::View<T>>(_tensor.get()); + } + + std::shared_ptr<nhwc::View<T>> _view = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(View_nhwc, ViewTypes); +TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes); + +TYPED_TEST(View_nhwc, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createView(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc index 44d1575d7..f04777174 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.cc +++ b/runtime/onert/core/src/interp/InterpExecutor.cc @@ -14,9 +14,10 @@ * limitations under the License. */ -#include "interp/InterpExecutor.h" -#include "interp/ExecEnv.h" -#include "interp/Interpreter.h" +#include "InterpExecutor.h" + +#include "ExecEnv.h" +#include "Interpreter.h" #include "util/logging.h" diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h index df6153d09..d6d5dd0a3 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.h +++ b/runtime/onert/core/src/interp/InterpExecutor.h @@ -74,7 +74,12 @@ public: } private: - const ir::Graph &_graph; + /** + * @brief Copy of target graph for lowering + * @note It uses copy of graph, not reference. + * Original graph may be deallocated by frontend. + */ + const ir::Graph _graph; ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; }; diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc index a9f7cd46a..9f95ffee0 100644 --- a/runtime/onert/test/core/interp/ExecManager.cc +++ b/runtime/onert/core/src/interp/InterpExecutor.test.cc @@ -14,22 +14,23 @@ * limitations under the License. */ -#include <gtest/gtest.h> - -#include <memory> +#include "InterpExecutor.h" -#include "ir/Graph.h" -#include "interp/InterpExecutor.h" #include "exec/Execution.h" +#include "ir/Graph.h" #include "ir/operation/BinaryArithmetic.h" +#include <gtest/gtest.h> + +#include <memory> + namespace { using namespace onert::ir; using InterpExecutor = onert::interp::InterpExecutor; using Execution = onert::exec::Execution; -using ExecutorMap = onert::exec::ExecutorMap; +using Executors = onert::exec::Executors; class InterpExecutorTest : public ::testing::Test { @@ -73,13 +74,11 @@ protected: _graph->verify(); - auto subgs = std::make_shared<onert::ir::Subgraphs>(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, _graph); - _executors = std::make_shared<ExecutorMap>(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + _executors = std::make_shared<Executors>(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)); } void CreateTwoStepModel() @@ -138,13 +137,11 @@ protected: _graph->verify(); - auto subgs = std::make_shared<onert::ir::Subgraphs>(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, _graph); - _executors = std::make_shared<ExecutorMap>(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + _executors = std::make_shared<Executors>(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)); } void CreateUnspecifiedDimensionsModel() @@ -191,13 +188,11 @@ protected: _graph->verify(); - auto subgs = std::make_shared<onert::ir::Subgraphs>(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, _graph); - _executors = std::make_shared<ExecutorMap>(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph))); + _executors = std::make_shared<Executors>(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)); } void createExecution() { _execution = std::make_unique<Execution>(_executors); } @@ -205,7 +200,7 @@ protected: virtual void TearDown() { _executors = nullptr; } std::shared_ptr<Graph> _graph{nullptr}; - std::shared_ptr<ExecutorMap> _executors{nullptr}; + std::shared_ptr<Executors> _executors{nullptr}; std::unique_ptr<Execution> _execution{nullptr}; const int32_t _activation_value{0}; }; diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc index 804e9fb51..fe4acd309 100644 --- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc +++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include <cker/operation/BinaryArithmeticOps.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/BinaryArithmetic.h" -#include "misc/polymorphic_downcast.h" -#include "cker/Types.h" + +#include <cker/operation/BinaryArithmeticOps.h> +#include <cker/Types.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc index a063ab14a..103604631 100644 --- a/runtime/onert/core/src/interp/operations/Concat.cc +++ b/runtime/onert/core/src/interp/operations/Concat.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/Concatenation.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Concat.h" -#include "misc/polymorphic_downcast.h" + +#include <cker/operation/Concatenation.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc index 0b43a4799..72c2057c2 100644 --- a/runtime/onert/core/src/interp/operations/Conv2D.cc +++ b/runtime/onert/core/src/interp/operations/Conv2D.cc @@ -14,15 +14,15 @@ * limitations under the License. */ -#include <cker/operation/Conv.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Conv2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" +#include "util/Utils.h" + +#include <cker/operation/Conv.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc index d1c62d73f..9f527440e 100644 --- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc @@ -14,15 +14,15 @@ * limitations under the License. */ -#include <cker/operation/DepthwiseConv.h> -#include <misc/polymorphic_downcast.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/DepthwiseConv2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" +#include "util/Utils.h" + +#include <cker/operation/DepthwiseConv.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc index 197855ff4..e13080e76 100644 --- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc +++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc @@ -14,17 +14,16 @@ * limitations under the License. */ -#include <cmath> - #include "OperationUtil.h" - -#include "interp/Registration.h" +#include "../Registration.h" #include "ir/operation/ElementwiseActivation.h" -#include <misc/polymorphic_downcast.h> #include <cker/operation/Logistic.h> #include <cker/operation/Tanh.h> +#include <misc/polymorphic_downcast.h> + +#include <cmath> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc index ef827605b..2bc9f517f 100644 --- a/runtime/onert/core/src/interp/operations/FullyConnected.cc +++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/FullyConnected.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/FullyConnected.h" -#include "misc/polymorphic_downcast.h" + +#include <cker/operation/FullyConnected.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc index 0ea60875c..d686cfcf6 100644 --- a/runtime/onert/core/src/interp/operations/Gather.cc +++ b/runtime/onert/core/src/interp/operations/Gather.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/Gather.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Gather.h" -#include "misc/polymorphic_downcast.h" + +#include <cker/operation/Gather.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc index b5c38819d..318088457 100644 --- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc +++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/InstanceNorm.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/InstanceNorm.h" -#include "misc/polymorphic_downcast.h" + +#include <cker/operation/InstanceNorm.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc index 0eec7fe9a..3db0828eb 100644 --- a/runtime/onert/core/src/interp/operations/Pad.cc +++ b/runtime/onert/core/src/interp/operations/Pad.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/Pad.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Pad.h" +#include <cker/operation/Pad.h> + namespace onert { namespace interp diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc index 2f3b71655..3935d4756 100644 --- a/runtime/onert/core/src/interp/operations/Pool2D.cc +++ b/runtime/onert/core/src/interp/operations/Pool2D.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include <cker/operation/AveragePool.h> -#include <cker/operation/MaxPool.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Pool2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" +#include "util/Utils.h" + +#include <cker/operation/AveragePool.h> +#include <cker/operation/MaxPool.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc index 3a118456b..1de5a5762 100644 --- a/runtime/onert/core/src/interp/operations/Reshape.cc +++ b/runtime/onert/core/src/interp/operations/Reshape.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "interp/Registration.h" +#include "../Registration.h" namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc index 1fc303117..8be2f2210 100644 --- a/runtime/onert/core/src/interp/operations/Softmax.cc +++ b/runtime/onert/core/src/interp/operations/Softmax.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include <cker/operation/SoftMax.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Softmax.h" -#include "misc/polymorphic_downcast.h" + +#include <cker/operation/SoftMax.h> +#include <misc/polymorphic_downcast.h> namespace onert { diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc index 755103dc2..59c8e8cdf 100644 --- a/runtime/onert/core/src/interp/operations/TransposeConv.cc +++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include <cker/operation/TransposeConv.h> -#include <misc/polymorphic_downcast.h> - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/TransposeConv.h" +#include <cker/operation/TransposeConv.h> +#include <misc/polymorphic_downcast.h> + namespace onert { namespace interp diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc index df30bbdbe..28cf4137d 100644 --- a/runtime/onert/core/src/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.cc @@ -17,19 +17,9 @@ #include "ir/Graph.h" #include "OperationValidator.h" +#include "verifier/Verifier.h" -#include <algorithm> - -#include <bitset> -#include <sstream> - -#include "util/logging.h" #include "util/Set.h" -#include "verifier/Verifier.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperationIndexMap.h" -#include "dumper/text/GraphDumper.h" -#include "backend/IConfig.h" namespace onert { @@ -38,6 +28,8 @@ namespace ir Graph::Graph() = default; +Graph::Graph(const Graph &) = default; + Graph::~Graph(void) = default; OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type) diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.test.cc index d6de7c0cc..144500745 100644 --- a/runtime/onert/test/core/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.test.cc @@ -14,11 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "ir/Graph.h" #include "ir/operation/BinaryArithmetic.h" -#include "ir/verifier/Verifier.h" + +#include <gtest/gtest.h> TEST(Graph, neg_inputs_and_outputs) { diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc index 591710a4d..fc956abe8 100644 --- a/runtime/onert/test/core/ir/LayoutSet.cc +++ b/runtime/onert/core/src/ir/LayoutSet.test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include <gtest/gtest.h> +#include "LayoutSet.h" -#include "ir/LayoutSet.h" +#include <gtest/gtest.h> using onert::ir::Layout; using onert::ir::LayoutSet; diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h index 0e7ed977b..0e7ed977b 100644 --- a/runtime/onert/test/core/ir/MockNode.h +++ b/runtime/onert/core/src/ir/MockNode.h diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/core/src/ir/Operand.test.cc index 47c98f939..0b858792a 100644 --- a/runtime/onert/test/core/ir/UseDef.cc +++ b/runtime/onert/core/src/ir/Operand.test.cc @@ -14,13 +14,14 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "ir/Graph.h" -#include "ir/verifier/Verifier.h" -#include <memory> + #include "MockNode.h" +#include "verifier/Verifier.h" +#include <gtest/gtest.h> + +#include <memory> #include <typeindex> namespace diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc index c363e5472..588c4e419 100644 --- a/runtime/onert/test/core/ir/OperandIndexSet.cc +++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "ir/OperandIndexSequence.h" +#include <gtest/gtest.h> + using onert::ir::OperandIndex; using onert::ir::OperandIndexSequence; diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/core/src/ir/Operands.test.cc index 6cf9c8842..aff228b10 100644 --- a/runtime/onert/test/core/ir/OperandSet.cc +++ b/runtime/onert/core/src/ir/Operands.test.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "ir/Operands.h" +#include <gtest/gtest.h> + TEST(ir_Operands, neg_set_test) { onert::ir::Operands set; diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/core/src/ir/Operation.test.cc index 68b477347..b3c4e852d 100644 --- a/runtime/onert/test/core/ir/SetIO.cc +++ b/runtime/onert/core/src/ir/Operation.test.cc @@ -14,16 +14,15 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "ir/Graph.h" #include "ir/Index.h" #include "ir/OperandIndexSequence.h" -#include "ir/operation/Conv2D.h" #include "ir/operation/Concat.h" +#include "ir/operation/Conv2D.h" -#include <memory> +#include <gtest/gtest.h> +#include <memory> #include <stdexcept> using Index = onert::ir::IOIndex; diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/core/src/ir/Operations.test.cc index 4a17eeb33..e57872689 100644 --- a/runtime/onert/test/core/ir/OperationSet.cc +++ b/runtime/onert/core/src/ir/Operations.test.cc @@ -14,10 +14,11 @@ * limitations under the License. */ -#include <gtest/gtest.h> +#include "ir/Operations.h" #include "MockNode.h" -#include "ir/Operations.h" + +#include <gtest/gtest.h> using onert::ir::Operation; using onert::ir::OperationIndex; diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.test.cc index c24aeda8d..afdb29254 100644 --- a/runtime/onert/test/core/ir/Shape.cc +++ b/runtime/onert/core/src/ir/Shape.test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include <ir/Shape.h> +#include "ir/Shape.h" #include <gtest/gtest.h> diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc index b4be2d9cd..1ec71cd55 100644 --- a/runtime/onert/test/core/ir/Verifier.cc +++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc @@ -14,14 +14,15 @@ * limitations under the License. */ -#include <gtest/gtest.h> +#include "Verifier.h" + +#include "../MockNode.h" -#include "ir/Operation.h" #include "ir/Graph.h" -#include "ir/verifier/Verifier.h" + +#include <gtest/gtest.h> + #include <memory> -#include "ir/Operand.h" -#include "MockNode.h" using IndexSet = onert::ir::OperandIndexSequence; using Mock = onert_test::ir::SimpleMock; diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc index 3fc0c8ece..d868efedf 100644 --- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc +++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include <sstream> -#include <vector> #include <cassert> +#include <sstream> #include <utility> +#include <vector> // json type for ChromeTracingWriter namespace diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc index 9da93f68a..b7fcefc7a 100644 --- a/runtime/onert/core/src/util/ConfigSource.cc +++ b/runtime/onert/core/src/util/ConfigSource.cc @@ -15,13 +15,15 @@ */ #include "util/ConfigSource.h" -#include "util/GeneralConfigSource.h" -#include "util/EnvConfigSource.h" +#include "util/logging.h" + +#include <misc/EnvConfigSource.h> +#include <misc/GeneralConfigSource.h> +#include <misc/IConfigSource.h> -#include <array> #include <algorithm> +#include <array> #include <cassert> - #include <memory> namespace onert @@ -29,12 +31,27 @@ namespace onert namespace util { +using namespace nnfw::misc; + static std::unique_ptr<IConfigSource> _source; static std::unique_ptr<IConfigSource> _source_ext; void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); } void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); } +void setConfigKeyValues(const CfgKeyValues &keyValues) +{ + auto configsrc = std::make_unique<GeneralConfigSource>(); + + for (auto it = keyValues.begin(); it != keyValues.end(); ++it) + { + VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; + configsrc->set(it->first, it->second); + } + + onert::util::config_source_ext(std::move(configsrc)); +} + static IConfigSource *config_source() { if (!_source) diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc deleted file mode 100644 index 0d25b7353..000000000 --- a/runtime/onert/core/src/util/EnvConfigSource.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/EnvConfigSource.h" - -#include <cstdlib> - -namespace onert -{ -namespace util -{ - -std::string EnvConfigSource::get(const std::string &key) const -{ - const char *value = std::getenv(key.c_str()); - if (value != nullptr) - { - return value; - } - else - { - return GeneralConfigSource::get(key); - } -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc index 83c2649d1..c1b9c4315 100644 --- a/runtime/onert/core/src/util/EventCollector.cc +++ b/runtime/onert/core/src/util/EventCollector.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventCollector.h" +#include "EventCollector.h" // C++ standard libraries #include <chrono> diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h index 774fe05ef..effb72373 100644 --- a/runtime/onert/core/src/util/EventCollector.h +++ b/runtime/onert/core/src/util/EventCollector.h @@ -17,12 +17,13 @@ #ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__ #define __ONERT_UTIL_EVENT_COLLECTOR_H__ -#include "util/EventRecorder.h" +#include "EventRecorder.h" + #include "util/TracingCtx.h" -#include <vector> -#include <utility> #include <string> +#include <utility> +#include <vector> class EventCollector { diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc index 5d3d5f5c6..85a588d38 100644 --- a/runtime/onert/core/src/util/EventRecorder.cc +++ b/runtime/onert/core/src/util/EventRecorder.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventRecorder.h" +#include "EventRecorder.h" void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt) { diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc index c42c53730..ca4bd302e 100644 --- a/runtime/onert/core/src/util/EventWriter.cc +++ b/runtime/onert/core/src/util/EventWriter.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" #include <cassert> diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc deleted file mode 100644 index 7d2757e58..000000000 --- a/runtime/onert/core/src/util/GeneralConfigSource.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/GeneralConfigSource.h" -#include "util/logging.h" - -namespace onert -{ -namespace util -{ - -std::string GeneralConfigSource::get(const std::string &key) const -{ - auto itr = _map.find(key); - if (itr == _map.end()) - { - return ""; - } - else - { - return itr->second; - } -} - -void GeneralConfigSource::set(const std::string &key, const std::string &val) -{ - VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl; - _map[key] = val; -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/core/src/util/Index.test.cc index 2d110e326..ff73e5e59 100644 --- a/runtime/onert/test/core/util/Index.cc +++ b/runtime/onert/core/src/util/Index.test.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - #include "util/Index.h" +#include <gtest/gtest.h> + using Index = ::onert::util::Index<uint32_t, struct TestTag>; TEST(Index, neg_index_test) diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc index b7fbac5e2..7a8b9f234 100644 --- a/runtime/onert/core/src/util/MDTableEventWriter.cc +++ b/runtime/onert/core/src/util/MDTableEventWriter.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include <sstream> -#include <vector> -#include <unordered_map> #include <cassert> -#include <utility> #include <map> #include <set> +#include <sstream> #include <stdint.h> +#include <unordered_map> +#include <utility> +#include <vector> // md table type namespace diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/core/src/util/ObjectManager.test.cc index 78f044e56..3fe735732 100644 --- a/runtime/onert/test/core/util/ObjectManager.cc +++ b/runtime/onert/core/src/util/ObjectManager.test.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - -#include "util/ObjectManager.h" #include "util/Index.h" +#include "util/ObjectManager.h" + +#include <gtest/gtest.h> using namespace onert; diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc index 6f03cfccf..4dea6d16c 100644 --- a/runtime/onert/core/src/util/SNPEEventWriter.cc +++ b/runtime/onert/core/src/util/SNPEEventWriter.cc @@ -14,11 +14,12 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include <unordered_map> #include <json/json.h> + #include <cassert> +#include <unordered_map> #include <utility> /** diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.test.cc index 2ecaa2885..96579bfa2 100644 --- a/runtime/onert/test/core/util/ShapeInference.cc +++ b/runtime/onert/core/src/util/ShapeInference.test.cc @@ -14,11 +14,10 @@ * limitations under the License. */ -#include <gtest/gtest.h> - -#include "ir/Layout.h" #include "util/ShapeInference.h" +#include <gtest/gtest.h> + using namespace onert::ir; TEST(ShapeInference, Elementwise) diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index 5649f286d..cf080abbc 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -65,10 +65,10 @@ public: /** * @brief Construct a new Loader object * - * @param graph reference on subgraphs + * @param model reference to model */ - explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs) - : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}, + explicit BaseLoader(std::unique_ptr<ir::Model> &model) + : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}, _tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>()) { _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA); @@ -114,7 +114,7 @@ protected: // Get BuiltinOperator BuiltinOperator getBuiltinOperator(const Operator *op) { - auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index()); + auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index()); auto builtin_op = builtin_opcode->builtin_code(); if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES) builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code()); @@ -176,7 +176,7 @@ private: void verifySubgraphIndex(int subg_index) { - const auto num_subgraphs = _model->subgraphs()->size(); + const auto num_subgraphs = _domain_model->subgraphs()->size(); if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs)) throw std::runtime_error{std::string{"Invalid subgraph index - "} + std::to_string(subg_index)}; @@ -189,9 +189,9 @@ protected: int32_t _pagesize; // loaded file description int _fd; - // Reference on loadable subgraphs - std::unique_ptr<ir::Subgraphs> &_subgraphs; - const Model *_model; + // Reference to ir::model (to be loaded from _domain_model) + std::unique_ptr<ir::Model> &_model; + const Model *_domain_model; // Maps Tensor indices to onert Operands. std::vector<ir::OperandIndex> _tensor_to_operand; std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names; @@ -290,6 +290,8 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te case TensorType::TensorType_INT8: return ir::DataType::QUANT_INT8_ASYMM; // case TensorType::TensorType_FLOAT64 + case TensorType::TensorType_UINT32: + return ir::DataType::UINT32; default: throw std::runtime_error( std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); @@ -358,7 +360,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir: const auto operand_index = subg.addOperand(shape, type_info); // Constant tensors are indicated by non-empty data. - const auto *data = _model->buffers()->Get(tensor->buffer())->data(); + const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data(); if (data != nullptr) { using std::ptrdiff_t; @@ -1037,7 +1039,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS && "Unsupported custom operation options format"); - auto *op_code = _model->operator_codes()->Get(op->opcode_index()); + auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index()); auto custom_op_name = op_code->custom_code()->str(); enum class BuiltinOP @@ -1670,7 +1672,7 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel() { LoaderDomain::VerifyModelBuffer(*_verifier.get()); - _model = LoaderDomain::GetModel(_base); + _domain_model = LoaderDomain::GetModel(_base); // Version unused // const auto version = _model->version(); // Description unused @@ -1678,14 +1680,14 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel() // Metabuffer unsued // const auto *metadata_buffer = _model->metadata_buffer(); // Load subgraphs and map operations on subgraph - const auto domain_subgraphs = _model->subgraphs(); - auto subgraphs = std::make_unique<ir::Subgraphs>(); - for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index) + const auto subgraphs = _domain_model->subgraphs(); + auto model = std::make_unique<ir::Model>(); + for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index) { - auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]); - subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg)); + auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]); + model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg)); } - _subgraphs = std::move(subgraphs); + _model = std::move(model); } } // namespace base_loader diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h index 44bf28056..87e5d70ae 100644 --- a/runtime/onert/frontend/circle/include/circle_loader.h +++ b/runtime/onert/frontend/circle/include/circle_loader.h @@ -25,8 +25,8 @@ namespace onert { namespace circle_loader { -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename); -std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size); +std::unique_ptr<ir::Model> loadModel(const std::string &filename); +std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size); } // namespace circle_loader } // namespace onert diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index aae831d61..5abcc9cd0 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -228,20 +228,20 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg) } // namespace -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename) +std::unique_ptr<ir::Model> loadModel(const std::string &filename) { - auto subgraphs = std::make_unique<ir::Subgraphs>(); - CircleLoader loader(subgraphs); + auto model = std::make_unique<ir::Model>(); + CircleLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } -std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size) +std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size) { - auto subgraphs = std::make_unique<ir::Subgraphs>(); - CircleLoader loader(subgraphs); + auto model = std::make_unique<ir::Model>(); + CircleLoader loader(model); loader.loadFromBuffer(buffer, size); - return subgraphs; + return model; } } // namespace circle_loader diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc index 56ca5ef00..19636a84d 100644 --- a/runtime/onert/frontend/nnapi/execution.cc +++ b/runtime/onert/frontend/nnapi/execution.cc @@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation, return ANEURALNETWORKS_UNEXPECTED_NULL; } - std::shared_ptr<onert::exec::ExecutorMap> executors; + std::shared_ptr<onert::exec::Executors> executors; compilation->publish(executors); diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc index 63036a398..bb247b97f 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc @@ -18,11 +18,12 @@ #include "util/logging.h" +using namespace onert; + // TODO Support multiple subgraphs ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept - : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>( - _subgraphs.get())}, - _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}} + : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()}, + _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)} { if (model->allowedToFp16()) { @@ -34,7 +35,7 @@ bool ANeuralNetworksCompilation::finish() noexcept { try { - _executors = _compiler->compile(); + _artifact = _compiler->compile(); } catch (const std::exception &e) { diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h index bd61f9d86..dff5c6dc6 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h @@ -21,8 +21,8 @@ #include "compiler/Compiler.h" #include "ir/Graph.h" -#include "ir/Subgraphs.h" -#include "exec/IExecutor.h" +#include "ir/Model.h" +#include "exec/Executors.h" #include "util/TracingCtx.h" struct ANeuralNetworksCompilation @@ -34,23 +34,16 @@ public: bool finish() noexcept; onert::compiler::State state(void) noexcept { return _compiler->state(); } - void publish(std::shared_ptr<onert::exec::ExecutorMap> &executors) noexcept + void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept { - executors = _executors; + executors = _artifact ? _artifact->_executors : nullptr; } private: - std::shared_ptr<onert::ir::Subgraphs> _subgraphs; - // TODO Refine the ownership of TracingCtx - // In case of nnfw API, nnfw_session has ownership of TracingCtx. - // In case of nnapi, there is no concept of session and primary model might have the ownership - // of TracingCtx. - // Since we don't support multiple models yet with nnapi in ONE, let's implement this later - // and let's make it work with one model for now. - std::unique_ptr<onert::util::TracingCtx> _tracing_ctx; - + std::shared_ptr<onert::ir::Model> _model; + std::unique_ptr<onert::compiler::CompilerOptions> _coptions; std::shared_ptr<onert::compiler::Compiler> _compiler; - std::shared_ptr<onert::exec::ExecutorMap> _executors; + std::shared_ptr<onert::compiler::CompilerArtifact> _artifact; }; #endif diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h index 70c5d2a4b..110c7cd55 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h @@ -26,7 +26,7 @@ struct ANeuralNetworksExecution { public: - ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors) + ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors) : _execution{std::make_shared<onert::exec::Execution>(executors)} { // DO NOTHING diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc index 81ffa26f3..a641368ec 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc @@ -273,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void) }); } -std::shared_ptr<onert::ir::Subgraphs> ANeuralNetworksModel::getSubGraphs() const +std::shared_ptr<onert::ir::Model> ANeuralNetworksModel::getModel() const { - auto all_subgs = std::make_shared<onert::ir::Subgraphs>(); + auto model = std::make_shared<onert::ir::Model>(); - all_subgs->push(onert::ir::SubgraphIndex{0}, _graph); + model->push(onert::ir::SubgraphIndex{0}, _graph); // TODO Find all child subgraphs and copy them to all_subgs // Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index // to operands of control flow operations // Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has // subgraph itself recursively - return all_subgs; + return model; } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h index 4301193d6..04f4cf0f2 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h @@ -22,7 +22,7 @@ #include <NeuralNetworksEx.h> #include "ir/Graph.h" -#include "ir/Subgraphs.h" +#include "ir/Model.h" struct ANeuralNetworksModel { @@ -59,7 +59,7 @@ public: size_t operandSize(uint32_t index) noexcept; bool isUsageSet(uint32_t index) noexcept; bool isOperationOutput(uint32_t index) noexcept; - std::shared_ptr<onert::ir::Subgraphs> getSubGraphs() const; + std::shared_ptr<onert::ir::Model> getModel() const; private: void setOptionalOperand(const onert::ir::OperandIndex idx); diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h index dda34cc6a..cf17863f5 100644 --- a/runtime/onert/frontend/tflite/include/tflite_loader.h +++ b/runtime/onert/frontend/tflite/include/tflite_loader.h @@ -26,7 +26,7 @@ namespace onert namespace tflite_loader { -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename); +std::unique_ptr<ir::Model> loadModel(const std::string &filename); } // namespace tflite_loader } // namespace onert diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc index 3b160473d..fe69e4e2a 100644 --- a/runtime/onert/frontend/tflite/src/tflite_loader.cc +++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc @@ -154,12 +154,12 @@ void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg) } // namespace -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename) +std::unique_ptr<ir::Model> loadModel(const std::string &filename) { - auto subgraphs = std::make_unique<ir::Subgraphs>(); - TFLiteLoader loader(subgraphs); + auto model = std::make_unique<ir::Model>(); + TFLiteLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } } // namespace tflite_loader diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt index 7a0df4eaa..8d9063f6c 100644 --- a/runtime/onert/frontend/trix/CMakeLists.txt +++ b/runtime/onert/frontend/trix/CMakeLists.txt @@ -2,7 +2,7 @@ if (NOT BUILD_TRIX_LOADER) return() endif () -nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +nnfw_find_package(TRIXEngine QUIET 2.5.0) if(TRIXEngine_FOUND) list(APPEND SOURCES src/trix_loader.cc) else() diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h index 297d5ec28..26d6a3c56 100644 --- a/runtime/onert/frontend/trix/include/trix_loader.h +++ b/runtime/onert/frontend/trix/include/trix_loader.h @@ -27,7 +27,7 @@ namespace trix_loader /** * @throw runtime_error when tvn path is wrong or tvn is invalid */ -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename); +std::unique_ptr<ir::Model> loadModel(const std::string &filename); } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc index e2995bbd1..cdf239648 100644 --- a/runtime/onert/frontend/trix/src/trix_loader.cc +++ b/runtime/onert/frontend/trix/src/trix_loader.cc @@ -67,11 +67,11 @@ void TrixMetaReader::init(const char *path) _meta = getNPUmodel_metadata(path, false); if (_meta == nullptr) { - throw std::runtime_error("Failed to get TRIV2 model metadata"); + throw std::runtime_error("Failed to get TRIX model metadata"); } if (NPUBIN_VERSION(_meta->magiccode) != 3) { - throw std::runtime_error("TRIV2 model metadata version mismatched."); + throw std::runtime_error("TRIX model metadata version mismatched."); } } @@ -81,9 +81,9 @@ public: /** * @brief Construct a new Loader object * - * @param graph reference on subgraphs + * @param model reference on model */ - explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {} + explicit TrixLoader(std::unique_ptr<ir::Model> &model) : _model(model) {} /** * @brief Load a model from file @@ -97,7 +97,6 @@ private: * @throw runtime_error when tvn path is wrong or tvn is invalid */ void loadModel(); - void loadSubgraphs(); std::unique_ptr<ir::Graph> loadSubgraph(); void loadOperands(ir::Graph &subg); ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg); @@ -112,8 +111,11 @@ private: protected: /** path to model (e.g. tvn) */ std::string _model_path; + /** original IO shapes */ + std::vector<ir::Shape> _origin_input_shapes; + std::vector<ir::Shape> _origin_output_shapes; /** Reference on loadable subgraphs */ - std::unique_ptr<ir::Subgraphs> &_subgraphs; + std::unique_ptr<ir::Model> &_model; TrixMetaReader _meta; }; @@ -154,6 +156,8 @@ void TrixLoader::loadBulk(ir::Graph &subg) { ir::operation::Bulk::Param param; param.binary_path = _model_path; + param.origin_input_shapes = _origin_input_shapes; + param.origin_output_shapes = _origin_output_shapes; ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; @@ -175,6 +179,7 @@ ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg) ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)), _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx)); + _origin_input_shapes.push_back(shape); // Create operand const auto operand_index = subg.addOperand(shape, type_info); return operand_index; @@ -191,6 +196,7 @@ ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)), _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx)); + _origin_output_shapes.push_back(shape); // Create operand const auto operand_index = subg.addOperand(shape, type_info); return operand_index; @@ -237,15 +243,13 @@ std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph() return subg; } -void TrixLoader::loadSubgraphs() +void TrixLoader::loadModel() { // one subgraph only auto subg = loadSubgraph(); - _subgraphs->push(ir::SubgraphIndex(0), std::move(subg)); + _model->push(ir::SubgraphIndex(0), std::move(subg)); } -void TrixLoader::loadModel() { loadSubgraphs(); } - void TrixLoader::loadFromFile(const std::string &file_path) { // model path will be used to set Bulk param @@ -255,12 +259,12 @@ void TrixLoader::loadFromFile(const std::string &file_path) loadModel(); } -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename) +std::unique_ptr<ir::Model> loadModel(const std::string &filename) { - auto subgraphs = std::make_unique<ir::Subgraphs>(); - TrixLoader loader(subgraphs); + auto model = std::make_unique<ir::Model>(); + TrixLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc index 9fc8e1ff2..eecbd2217 100644 --- a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc +++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc @@ -22,10 +22,10 @@ namespace onert { namespace trix_loader { -std::unique_ptr<ir::Subgraphs> loadModel(const std::string &) +std::unique_ptr<ir::Model> loadModel(const std::string &) { - auto subgraphs = std::make_unique<ir::Subgraphs>(); - return subgraphs; + auto model = std::make_unique<ir::Model>(); + return model; } } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt deleted file mode 100644 index 38899976d..000000000 --- a/runtime/onert/test/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -set(TEST_ONERT test_onert) - -file(GLOB_RECURSE TESTS "*.cc") - -add_executable(${TEST_ONERT} ${TESTS}) - -target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src) - -target_link_libraries(${TEST_ONERT} onert_core) -target_link_libraries(${TEST_ONERT} gtest) -target_link_libraries(${TEST_ONERT} gtest_main) -target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl) -add_test(${TEST_ONERT} ${TEST_ONERT}) - -install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone) |