diff options
Diffstat (limited to 'runtime/onert/core/src')
129 files changed, 5164 insertions, 263 deletions
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc index b9aab7994..7b36f106d 100644 --- a/runtime/onert/core/src/backend/BackendContext.cc +++ b/runtime/onert/core/src/backend/BackendContext.cc @@ -16,8 +16,6 @@ #include "backend/BackendContext.h" -#include "ir/Operation.h" - namespace onert { namespace backend diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index b03eb607c..71cde4cde 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -35,6 +35,15 @@ StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> & // DO NOTHING } +StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, + const std::string planner_id, + DynamicTensorManager *dynamic_tensor_manager) + : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, _dynamic_tensor_manager{ + dynamic_tensor_manager} +{ + // DO NOTHING +} + void StaticTensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); diff --git a/runtime/onert/core/src/backend/basic/Tensor.cc b/runtime/onert/core/src/backend/basic/Tensor.cc index c2bbc5a66..de1cff4f4 100644 --- a/runtime/onert/core/src/backend/basic/Tensor.cc +++ b/runtime/onert/core/src/backend/basic/Tensor.cc @@ -51,6 +51,7 @@ bool Tensor::applyShape(const ir::Shape &new_shape) auto allocTensorMem = [&]() { auto capacity = total_size(); + assert(_dynamic_mem_mgr); auto alloc = _dynamic_mem_mgr->allocate(this, capacity); setBuffer(alloc); }; @@ -68,6 +69,7 @@ bool Tensor::applyShape(const ir::Shape &new_shape) auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type()); if (previous_size != new_size) { + assert(_dynamic_mem_mgr); _dynamic_mem_mgr->deallocate(this); setShape(new_shape); diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc index a10cc2bf9..f9d83875d 100644 --- a/runtime/onert/core/src/backend/basic/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc @@ -34,6 +34,14 @@ TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg) /* empty */ } +TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::string planner_id) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout layout) { diff --git a/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc new file mode 100644 index 000000000..d09604224 --- /dev/null +++ b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <backend/basic/train/TrainableTensor.h> + +namespace onert +{ +namespace backend +{ +namespace basic +{ +namespace train +{ + +std::vector<ITensor *> TrainableTensor::optVars() +{ + std::vector<ITensor *> ret; + for (auto &&e : _opt_vars) + { + ret.emplace_back(e.get()); + } + return ret; +} + +void TrainableTensor::fillBuffer(const std::shared_ptr<ir::Data> &data) +{ + auto *buffer = _tensor.buffer(); + assert(buffer); + assert(total_size() == data->size()); + std::memcpy(buffer, data->base(), data->size()); +} + +} // namespace train +} // namespace basic +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h index 3791f3ffa..c05494a6a 100644 --- a/runtime/onert/core/src/backend/builtin/Backend.h +++ b/runtime/onert/core/src/backend/builtin/Backend.h @@ -22,8 +22,16 @@ #include "KernelGenerator.h" #include "TensorBuilder.h" #include "Tensor.h" +#ifdef ONERT_TRAIN +#include "train/BackendContext.h" +#include "train/KernelGenerator.h" +#include "train/TensorRegistry.h" +#endif // ONERT_TRAIN #include <backend/Backend.h> +#ifdef ONERT_TRAIN +#include <backend/train/ITrainableBackend.h> +#endif // ONERT_TRAIN #include <memory> @@ -35,6 +43,10 @@ namespace builtin { class Backend : public ::onert::backend::Backend +#ifdef ONERT_TRAIN + , + public backend::train::ITrainableBackend +#endif // ONERT_TRAIN { public: Backend() : _config{std::make_shared<Config>()} {} @@ -70,6 +82,22 @@ public: return context; } +#ifdef ONERT_TRAIN + std::unique_ptr<backend::train::TrainableBackendContext> + newContext(backend::train::TrainableContextData &&tdata) const override + { + const auto &tgraph = *tdata.tgraph; + auto tr = std::make_shared<train::TensorRegistry>(); + // TODO Create TensorBuilder if necessary + auto tdata_ptr = std::make_unique<backend::train::TrainableContextData>(std::move(tdata)); + auto context = std::make_unique<train::BackendContext>(this, std::move(tdata_ptr), tr); + + context->kernel_gen = + std::make_shared<train::KernelGenerator>(tgraph, tr, context->external_context()); + return context; + } +#endif // ONERT_TRAIN + private: std::shared_ptr<IConfig> _config; }; diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc index c1a2ed537..573617e28 100644 --- a/runtime/onert/core/src/backend/builtin/BackendContext.cc +++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc @@ -32,7 +32,7 @@ FunctionMap BackendContext::genKernels() { FunctionMap ret; - for (auto op_ind : _data.op_order) + for (auto &&op_ind : _data.op_order) { auto fn_seq = kernel_gen->generate(op_ind); ret.emplace_back(op_ind, std::move(fn_seq)); diff --git a/runtime/onert/core/src/backend/builtin/Config.cc b/runtime/onert/core/src/backend/builtin/Config.cc index f792c0c36..e5f6d4c21 100644 --- a/runtime/onert/core/src/backend/builtin/Config.cc +++ b/runtime/onert/core/src/backend/builtin/Config.cc @@ -27,7 +27,7 @@ std::string Config::ID = "builtin"; bool Config::initialize() { return true; } -ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout) +ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout) { return frontend_layout; } diff --git a/runtime/onert/core/src/backend/builtin/Config.h b/runtime/onert/core/src/backend/builtin/Config.h index 5226eba69..196b299d3 100644 --- a/runtime/onert/core/src/backend/builtin/Config.h +++ b/runtime/onert/core/src/backend/builtin/Config.h @@ -34,7 +34,7 @@ public: static std::string ID; std::string id() override { return ID; } bool initialize() override; - ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override; bool supportPermutation() override { return false; } bool supportDynamicTensor() override { diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc index 4533703a6..00c200a92 100644 --- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc @@ -71,14 +71,14 @@ void KernelGenerator::visit(const ir::operation::If &node) const auto else_subg_index = node.param().else_subg_index; std::vector<backend::IPortableTensor *> input_tensors; - for (const auto input_index : node.getInputs()) + for (const auto &input_index : node.getInputs()) { auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } std::vector<backend::IPortableTensor *> output_tensors; - for (const auto output_index : node.getOutputs()) + for (const auto &output_index : node.getOutputs()) { auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); @@ -117,14 +117,14 @@ void KernelGenerator::visit(const ir::operation::While &node) // This op does not support input as a constant, because builtin backend does not have // TensorBuilder std::vector<backend::IPortableTensor *> input_tensors; - for (const auto input_index : node.getInputs()) + for (const auto &input_index : node.getInputs()) { auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } std::vector<backend::IPortableTensor *> output_tensors; - for (const auto output_index : node.getOutputs()) + for (const auto &output_index : node.getOutputs()) { auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc index c0ca4046c..8b00db468 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc @@ -96,7 +96,7 @@ void WhileLayer::run() // Need some temp tensors to hold the body subgraph output std::vector<std::unique_ptr<Tensor>> temp_outputs_o; std::vector<IPortableTensor *> temp_outputs; - for (auto io_tensor : body_exec->getOutputTensors()) + for (auto &&io_tensor : body_exec->getOutputTensors()) { auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(), _dyn_memory_manager); @@ -139,7 +139,7 @@ void WhileLayer::run() // Clean-up the temp tensors _dyn_memory_manager->deallocate(cond_output_tensor.get()); - for (auto tensor : temp_outputs) + for (auto &&tensor : temp_outputs) { _dyn_memory_manager->deallocate(tensor); } diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.cc b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc new file mode 100644 index 000000000..fa9131f4d --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "backend/basic/train/TrainableBackendContextHelpers.h" +#include "exec/FunctionSequence.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +backend::ITensorRegistry *BackendContext::genTensors() +{ + // For now, there is no need to generate tensors for forwarding. + // builtin train backend handles 3 operators: `Permute`, `IF`, `WHILE`. + // `Permute`: Tensor generation is not required. + // `IF`, `WHILE`: Not supported yet + return tensor_registry().get(); +} + +backend::train::ITensorRegistry *BackendContext::genTrainingTensors() +{ + // For now, there is no need to generate tensors for backwarding. + return tensor_registry().get(); +} + +backend::train::FunctionMap BackendContext::genKernels() +{ + backend::train::FunctionMap ret; + + for (auto &&op_ind : _tdata->op_order) + { + auto tn_seq = kernel_gen->generate(op_ind); + ret.emplace_back(op_ind, std::move(tn_seq)); + } + + trainable_graph()->operands().iterate( + [&](const ir::OperandIndex &ind, const ir::Operand &operand) { + if (!external_operands().contains(ind) && operand.isConstant()) + { + throw std::runtime_error( + "BackendContext: builtin backend does not support updatable weights yet"); + } + }); + + // TODO Enable prepare() + // for (auto &&it : ret) + // { + // auto &fn_seq = it.second; + // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + // } + + return ret; +} + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.h b/runtime/onert/core/src/backend/builtin/train/BackendContext.h new file mode 100644 index 000000000..6f8ce4cae --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ + +#include <backend/train/TrainableBackendContext.h> + +#include "KernelGenerator.h" +#include "../ExternalContext.h" +#include "../TensorBuilder.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +class BackendContext : public backend::train::TrainableBackendContext +{ +public: + BackendContext(const backend::train::ITrainableBackend *backend, + std::unique_ptr<backend::train::TrainableContextData> &&data, + std::shared_ptr<backend::train::ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : backend::train::TrainableBackendContext(backend, std::move(data), tensor_registry), + kernel_gen{kernel_gen}, + _external_context(new ExternalContext), _tensor_builder{tensor_builder} + { + } + + backend::ITensorRegistry *genTensors() override; + backend::train::ITensorRegistry *genTrainingTensors() override; + +public: + backend::train::FunctionMap genKernels() override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +public: + // TODO Make it private + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr<ExternalContext> _external_context; + +private: + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc new file mode 100644 index 000000000..6f2c0a3b9 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "kernel/PermuteLayer.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context) + : KernelGeneratorBase{tgraph}, _tensor_reg{tensor_reg}, _external_context(external_context) +{ +} + +std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::OperationIndex ind) +{ + auto ret = std::make_unique<exec::train::TrainableFnSequence>(); + const auto &op = _tgraph.operation(ind); + op.accept(*this); + // _return_fn must have been generated + if (_return_fn == nullptr) + { + throw std::runtime_error(op.name() + " op does not supported trainable kernel yet"); + } + + ret->_functions.emplace_back(std::move(_return_fn)); + + return ret; +} + +void KernelGenerator::visit(const ir::train::operation::Permute &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + // Add PermuteLayer + std::vector<ITensor *> output_tensors{getTensor(output_index)}; + std::vector<ITensor *> input_tensors{getTensor(input_index)}; + + std::vector<ITensor *> output_deriv_tensors; + std::vector<ITensor *> input_deriv_tensors; + + auto input_deriv_tensor = getDerivativeTensor(input_index); + auto output_deriv_tensor = getDerivativeTensor(output_index); + output_deriv_tensors.emplace_back(output_deriv_tensor); + input_deriv_tensors.emplace_back(input_deriv_tensor); + + // NOTE IOTensors of graph outputs for passing data to users must be ignored in training + // because the buffers of those IOTensors are unnecessary and nullptr + bool ignore_forward_in_training = _whole_graph_outputs.contains(output_index); + auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, + input_deriv_tensors, output_deriv_tensors, + ignore_forward_in_training, _external_context); + + _return_fn = std::move(fn); +} + +backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index) +{ + // Get Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getITensor(index); + assert(ret != nullptr); + return ret; +} + +backend::ITensor *KernelGenerator::getDerivativeTensor(const ir::OperandIndex &index) +{ + // Get derivative Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getDerivativeITensor(index); + return ret; +} + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h new file mode 100644 index 000000000..d8781c0d0 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ + +#include "../ExternalContext.h" +#include "../train/TensorRegistry.h" +#include "../../../compiler/train/TensorRegistries.h" + +#include <backend/train/KernelGeneratorBase.h> +#include <exec/train/TrainableFnSequence.h> +#include <ir/train/TrainableGraph.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +class KernelGenerator : public backend::train::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::train::TrainableGraph &tgraph, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context); + + std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex ind) override; + + void setTensorRegistries(const compiler::train::TensorRegistries &tensor_registries) + { + _tensor_registries = tensor_registries; + } + + void setWholeGraphOutputs(const ir::OperandIndexSequence &outputs) + { + _whole_graph_outputs = outputs; + } + +private: + void visit(const ir::train::operation::Permute &) override; + +private: + backend::ITensor *getTensor(const ir::OperandIndex &index); + backend::ITensor *getDerivativeTensor(const ir::OperandIndex &index); + +private: + std::shared_ptr<TensorRegistry> _tensor_reg; + compiler::train::TensorRegistries _tensor_registries; + const std::shared_ptr<ExternalContext> _external_context; + ir::OperandIndexSequence _whole_graph_outputs; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/Tensor.h b/runtime/onert/core/src/backend/builtin/train/Tensor.h new file mode 100644 index 000000000..611407bd2 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/Tensor.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ + +#include <backend/basic/train/TrainableTensor.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +using TrainableTensor = basic::train::TrainableTensor; +using DerivativeTensor = basic::Tensor; +using GradientTensor = basic::Tensor; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h new file mode 100644 index 000000000..c48e5fe93 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ + +#include <backend/train/ITensorRegistry.h> + +#include "../IOTensor.h" +#include "../Tensor.h" +#include "Tensor.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +using BaseTensorRegistry = + backend::train::PortableTensorRegistryTemplate<Tensor, TrainableTensor, DerivativeTensor, + GradientTensor>; + +class TensorRegistry : public backend::train::ITensorRegistry +{ +public: + TensorRegistry() : _base_reg{new BaseTensorRegistry} {} + + ITensor *getITensor(const ir::OperandIndex &index) override + { + auto base_tensor = _base_reg->getITensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + ITensor *getNativeITensor(const ir::OperandIndex &index) override + { + auto base_tensor = _base_reg->getNativeITensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + IPortableTensor *getPortableTensor(const ir::OperandIndex &index) + { + auto base_tensor = _base_reg->getPortableTensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + IOTensor *getNativeIOTensor(const ir::OperandIndex &index) + { + auto tensor = _native_io_tensors.find(index); + if (tensor != _native_io_tensors.end()) + return tensor->second.get(); + return nullptr; + } + + ITensor *getDerivativeITensor(const ir::OperandIndex &index) override + { + return _base_reg->getDerivativeTensor(index); + } + + ITensor *getGradientITensor(const ir::OperandIndex &index) override + { + return _base_reg->getGradientTensor(index); + } + + DerivativeTensor *getDerivativeTensor(const ir::OperandIndex &index) + { + return _base_reg->getDerivativeTensor(index); + } + + bool setMigrantTensor(const ir::OperandIndex &index, IPortableTensor *tensor) override + { + assert(tensor); + assert(!getITensor(index)); // For the index, tensor is not registered yet + _base_reg->setMigrantTensor(index, tensor); + return true; + } + + void setDerivativeTensor(const ir::OperandIndex &index, std::unique_ptr<DerivativeTensor> tensor) + { + _base_reg->setDerivativeTensor(index, std::move(tensor)); + } + + void setGradientTensor(const ir::OperandIndex &index, std::unique_ptr<GradientTensor> tensor) + { + _base_reg->setGradientTensor(index, std::move(tensor)); + } + + void setNativeIOTensor(ir::OperandIndex index, std::unique_ptr<IOTensor> &&tensor) + { + assert(tensor); + assert(!getITensor(index)); // For the index, tensor is not registered yet + _native_io_tensors[index] = std::move(tensor); + } + + const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors() + { + return _native_io_tensors; + } + std::shared_ptr<BaseTensorRegistry> base_reg() { return _base_reg; } + +private: + std::shared_ptr<BaseTensorRegistry> _base_reg; + ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc new file mode 100644 index 000000000..929092dde --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc @@ -0,0 +1,85 @@ + + +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ +namespace kernel +{ + +PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors, + const std::vector<ITensor *> &dst_tensors, + const std::vector<ITensor *> &input_deriv_tensors, + const std::vector<ITensor *> &output_deriv_tensors, + bool ignore_forward_in_training, + const std::shared_ptr<ExternalContext> &external_context) + : builtin::kernel::PermuteLayer{src_tensors, dst_tensors, external_context}, + _input_deriv_tensors{input_deriv_tensors}, _output_deriv_tensors{output_deriv_tensors}, + _ignore_forward_in_training{ignore_forward_in_training} +{ + assert(input_deriv_tensors.size() == output_deriv_tensors.size()); + assert(src_tensors.size() == dst_tensors.size()); +} + +void PermuteLayer::optimize() +{ + builtin::kernel::PermuteLayer::optimize(); + + // TODO Calculate offsets of derivative tensors if necessary +} + +void PermuteLayer::forward(bool training) +{ + if (training && _ignore_forward_in_training) + return; + + builtin::kernel::PermuteLayer::run(); +} + +void PermuteLayer::backward() +{ + for (uint32_t i = 0; i < _output_deriv_tensors.size(); ++i) + { + auto src_deriv = _output_deriv_tensors.at(i); + auto dst_deriv = _input_deriv_tensors.at(i); + + // NOTE The derivative tensors corresponding to inputs/outputs of model are nullptr + // because permuting those tensors is meaningless + if (src_deriv && dst_deriv) + { + const auto rank = src_deriv->getShape().rank(); + auto output_offsets = _dst_tensors_offsets.at(i); + auto input_offsets = _src_tensors_offsets.at(i); + + exec::IPermuteFunction::permute(src_deriv, dst_deriv, rank, output_offsets, input_offsets); + } + } +} + +} // namespace kernel +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h new file mode 100644 index 000000000..de8063a21 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ + +#include "../../kernel/PermuteLayer.h" + +#include "exec/train/ITrainableFunction.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ +namespace kernel +{ + +class PermuteLayer : public builtin::kernel::PermuteLayer, public exec::train::ITrainableFunction +{ +public: + PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors, + const std::vector<ITensor *> &input_deriv_tensors, + const std::vector<ITensor *> &output_deriv_tensors, bool ignore_forward_in_training, + const std::shared_ptr<ExternalContext> &external_context); + + void optimize() override; + + void forward(bool training) override; + void backward() override; + +private: + std::vector<ITensor *> _input_deriv_tensors; + std::vector<ITensor *> _output_deriv_tensors; + bool _ignore_forward_in_training; +}; + +} // namespace kernel +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 45124556b..ba621bb4f 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -16,6 +16,7 @@ #include "compiler/Compiler.h" +#include "CompilerHelpers.h" #include "ExecutorFactory.h" #include "ShapeValidator.h" #include "pass/ConstantOutputPass.h" @@ -30,6 +31,7 @@ #include "compiler/StaticShapeInferer.h" #include <misc/string_helpers.h> +#include <misc/polymorphic_downcast.h> namespace onert { @@ -69,10 +71,25 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void) throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); } + if (!_options->minmax_filepath.empty()) + { + if (_options->executor != "Linear") + throw std::runtime_error("Recording minmax works only with Linear executor"); + } + + if (!_model->hasOnly<ir::Graph>()) + { + throw std::runtime_error("Compiler can only compile models for inference."); + } + _options->forceInternalOptions(); _options->verboseOptions(); - _model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + auto custom_kernel_builder = _model->getKernelBuilder(); + + _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + // Mandatory passes pass::PassRunner{} .append(std::make_unique<pass::ConstantOutputPass>(subg)) @@ -96,7 +113,9 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void) // Lower: Assign backend std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs; { - _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) { + _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + // Lower: Assign backend lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options); // Set tracing_ctx for copied graph @@ -119,7 +138,7 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void) // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called // recursively std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = - StaticShapeInferer::createStaticShapeInferers(lowered_subgs); + createStaticShapeInferers(lowered_subgs); const auto primary_subg_idx = ir::SubgraphIndex{0}; inferers.at(primary_subg_idx)->infer(); @@ -158,10 +177,15 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void) ir::OperationDumper dumper("Executor generation of Subgraph " + std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); - - auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create( - std::move(lowered_subg), tracing_ctx.get(), *_options, executors, model_index)}; + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _options; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builder; + auto executor = std::unique_ptr<exec::IExecutor>{ + ExecutorFactory::get().create(std::move(lowered_subg), executors, args)}; executor->setIndexedRanks(indexed_ranks); executors->emplace(model_index, subg_index, std::move(executor)); } diff --git a/runtime/onert/core/src/compiler/CompilerFactory.cc b/runtime/onert/core/src/compiler/CompilerFactory.cc index d8d4bb277..aeb0876c4 100644 --- a/runtime/onert/core/src/compiler/CompilerFactory.cc +++ b/runtime/onert/core/src/compiler/CompilerFactory.cc @@ -17,6 +17,9 @@ #include "compiler/CompilerFactory.h" #include "MultiModelCompiler.h" +#ifdef ONERT_TRAIN +#include "train/TrainingCompiler.h" +#endif // ONERT_TRAIN #include "compiler/Compiler.h" @@ -33,8 +36,18 @@ CompilerFactory &CompilerFactory::get() std::unique_ptr<ICompiler> CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg, - std::vector<std::unique_ptr<CompilerOptions>> &copts) + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const compiler::train::TrainingInfo *training_info) { +#ifdef ONERT_TRAIN + // Returing compiler for training + if (training_info) + return std::make_unique<train::TrainingCompiler>(nnpkg, copts, *training_info); +#else // ONERT_TRAIN + (void)training_info; +#endif // ONERT_TRAIN + + // Returing compiler for inference if (nnpkg->model_count() == 1) return std::make_unique<Compiler>(nnpkg, copts); diff --git a/runtime/onert/core/src/compiler/CompilerHelpers.h b/runtime/onert/core/src/compiler/CompilerHelpers.h new file mode 100644 index 000000000..798334b3b --- /dev/null +++ b/runtime/onert/core/src/compiler/CompilerHelpers.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_COMPILER_HELPERS_H__ +#define __ONERT_COMPILER_COMPILER_HELPERS_H__ + +#include <compiler/ILoweredGraph.h> +#include <compiler/StaticShapeInferer.h> +#include <ir/Index.h> + +#include <memory> +#include <unordered_map> + +namespace onert +{ +namespace compiler +{ + +/** + * @brief Create a shape inferer map for a lowered model + * @param[in] lowered_subgs lowered model map + * @return Shape inferer map + */ +template <typename LoweredGraphType, + typename = std::enable_if_t<std::is_base_of<ILoweredGraph, LoweredGraphType>::value>> +static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> +createStaticShapeInferers( + const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraphType>> &lowered_subgs) +{ + std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> lsubgs; + for (auto &&e : lowered_subgs) + lsubgs[e.first] = e.second.get(); + return StaticShapeInferer::createStaticShapeInferers(lsubgs); +} + +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_COMPILER_HELPERS_H__ diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc index b5fd392e0..830d9dd00 100644 --- a/runtime/onert/core/src/compiler/CompilerOptions.cc +++ b/runtime/onert/core/src/compiler/CompilerOptions.cc @@ -75,6 +75,7 @@ std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig() { auto o = std::make_unique<CompilerOptions>(); o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); + o->minmax_filepath = util::getConfigString(util::config::MINMAX_FILEPATH); o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); o->executor = util::getConfigString(util::config::EXECUTOR); diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index b09d6b021..6a08524cc 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -25,6 +25,9 @@ #include "../exec/ExecTime.h" #include "../exec/ExecutionObservers.h" #include "../exec/LinearExecutor.h" +#ifdef MINMAX_H5DUMPER +#include "../exec/MinMaxRecorder.h" +#endif #include "../exec/ParallelExecutor.h" #include "../ir/OperationCloner.h" @@ -36,6 +39,14 @@ #include <functional> #include <memory> +#ifdef ONERT_TRAIN +#include "../backend/builtin/train/BackendContext.h" +#include "../exec/train/TrainableExecutor.h" + +#include <backend/train/TrainableBackendContext.h> +#include <backend/train/ITrainableBackend.h> +#endif // ONERT_TRAIN + namespace onert { namespace @@ -74,7 +85,7 @@ public: void run() override { - for (auto tensor : _dealloc_list) + for (auto &&tensor : _dealloc_list) { if (!tensor->is_dynamic()) continue; @@ -86,7 +97,8 @@ private: DeallocList _dealloc_list; }; -void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph, +// TODO Unify initializeSubgraphIOTensors +void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts, const ir::OperandIndexSequence &indices) { @@ -104,7 +116,38 @@ void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph, } assert(builtin_tensor_reg); - for (auto ind : indices) + for (auto &&ind : indices) + { + const auto &operand = lowered_graph.graph().operands().at(ind); + auto tensor = std::make_unique<backend::builtin::IOTensor>( + operand.info(), + ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */ + ); + + // Add tensor to builtin TensorRegistry. + builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); + } +} + +#ifdef ONERT_TRAIN +void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts, + const ir::OperandIndexSequence &indices) +{ + std::shared_ptr<backend::builtin::train::TensorRegistry> builtin_tensor_reg; + for (const auto &e : backend_contexts) + { + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::builtin::Config::ID) + { + builtin_tensor_reg = std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>( + context->tensor_registry()); + } + } + assert(builtin_tensor_reg); + + for (auto &&ind : indices) { const auto &operand = lowered_graph.graph().operands().at(ind); auto tensor = std::make_unique<backend::builtin::IOTensor>( @@ -116,8 +159,11 @@ void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph, builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); } } +#endif // ONERT_TRAIN -backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, bool linear_executor) +backend::BackendContexts +createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor, + std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder) { backend::BackendContexts contexts; auto &backend_manager = compiler::BackendManager::get(); @@ -125,7 +171,7 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map; // Generate partial graphs for each backend - for (auto backend : backend_manager.getAll()) + for (auto &&backend : backend_manager.getAll()) { auto &data = context_data_map[backend]; auto graph = std::make_unique<ir::Graph>(); @@ -157,7 +203,7 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b }); // Separate operations into partial graphs whole_graph.operations().iterate( - [&](const ir::OperationIndex &op_ind, const ir::Operation &operation) { + [&](const ir::OperationIndex &op_ind, const ir::IOperation &operation) { auto &op_li = lgraph.lower_info().operation; auto backend = op_li.at(op_ind).backend(); auto &partial_graph = *context_data_map[backend].graph; @@ -168,7 +214,7 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b // Add missing operands (externals) auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - for (auto operand_ind : io_list) + for (auto &&operand_ind : io_list) { if (partial_graph.operands().exist(operand_ind)) continue; @@ -217,12 +263,33 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order), [&](const auto &ind) { return data.graph->operations().exist(ind); }); data.is_linear_executor = linear_executor; - data.custom_kernel_builder = lgraph.graph().getKernelBuilder(); + data.custom_kernel_builder = custom_kernel_builder; contexts.emplace(backend, backend->newContext(std::move(data))); } return contexts; } +template <typename Context> +std::deque<std::pair<const backend::Backend *, Context *>> orderBackendContext( + const std::unordered_map<const backend::Backend *, std::unique_ptr<Context>> &tbackend_contexts) +{ + std::deque<std::pair<const backend::Backend *, Context *>> ordered_contexts; + + for (auto &&pair : tbackend_contexts) + { + // NOTE builtin backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "builtin") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); + } + + return ordered_contexts; +} + } // namespace } // namespace onert @@ -240,34 +307,30 @@ ExecutorFactory &ExecutorFactory::get() ExecutorFactory::ExecutorFactory() { _map["Linear"] = createLinearExecutor; - _map["Dataflow"] = - std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, false); - _map["Parallel"] = - std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, true); + _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, false); + _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, true); } exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index) + const ExecutorFactoryArgs &args) { - return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors, - index); + assert(args.options != nullptr); + return _map.at(args.options->executor)(std::move(lowered_graph), executors, args); } -void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, +void ExecutorFactory::prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts) { TensorRegistries tensor_regs{backend_contexts, true}; lowered_graph.graph().operations().iterate( - [&](const ir::OperationIndex &op_ind, const ir::Operation &op) { + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind); auto &backend_ctx = backend_contexts.at(lower_info->backend()); - for (auto ind : + for (auto &&ind : (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { // If an Operation's input/output tensor does not have an own tensor object, @@ -307,7 +370,6 @@ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts) { std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; - for (auto &&pair : backend_contexts) { // NOTE builtin backend must be processed lastly. @@ -319,19 +381,22 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con else ordered_contexts.emplace_front(pair.first, pair.second.get()); } - return ordered_contexts; } -exec::IExecutor *ExecutorFactory::createLinearExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index) +exec::IExecutor * +ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args) { + const auto options = args.options; + const auto &model_index = args.model_index; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; auto &graph = lowered_graph->graph(); backend::BackendContexts backend_contexts = - createBackendContexts(*lowered_graph, options.executor == "Linear"); + createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder); TensorRegistries tensor_regs{backend_contexts, true}; @@ -352,7 +417,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor( prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index); ExecutionBuilder builder; @@ -382,7 +447,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor( uses_map[ind]++; } - for (const auto op_ind : order) + for (const auto &op_ind : order) { const auto &op = graph.operations().at(op_ind); auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; @@ -422,7 +487,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor( auto &fn_seq = pair.second; auto &op = lowered_graph->graph().operations().at(op_ind); auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); - if (options.he_profiling_mode) + if (options->he_profiling_mode) fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); if (!dealloc_list_map[op_ind].empty()) fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind])); @@ -439,23 +504,33 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor( order, tracing_ctx}; - if (!options.trace_filepath.empty()) + if (!options->trace_filepath.empty()) { std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx); + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } +#ifdef MINMAX_H5DUMPER + if (!options->minmax_filepath.empty()) + exec->addObserver(std::make_unique<exec::MinMaxRecorder>( + options->minmax_filepath, exec->graph(), exec->getBackendContexts())); +#endif return exec; } -exec::IExecutor *ExecutorFactory::createDataflowExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index, bool parallel) +exec::IExecutor * +ExecutorFactory::createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, bool parallel) { + const auto options = args.options; + const auto &model_index = args.model_index; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; + backend::BackendContexts backend_contexts = - createBackendContexts(*lowered_graph, options.executor == "Linear"); + createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder); TensorRegistries tensor_regs{backend_contexts, true}; @@ -472,7 +547,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index); ExecutionBuilder builder; @@ -489,7 +564,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( auto &fn_seq = pair.second; auto &op = lowered_graph->graph().operations().at(op_ind); auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); - if (options.he_profiling_mode) + if (options->he_profiling_mode) fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)}); } @@ -508,7 +583,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), tracing_ctx}; - if (options.he_profiling_mode) + if (options->he_profiling_mode) { std::vector<const backend::Backend *> backends; for (const auto &pair : backend_contexts) @@ -523,15 +598,304 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( exec = dataflow_exec; } - if (!options.trace_filepath.empty()) + if (!options->trace_filepath.empty()) + { + std::unique_ptr<exec::IExecutionObserver> ctp = + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); + exec->addObserver(std::move(ctp)); + } + + return exec; +} + +#ifdef ONERT_TRAIN +exec::IExecutor * +ExecutorFactory::create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer) +{ + assert(args.options != nullptr); + + if (args.options->executor != "Linear") + throw std::runtime_error("ExecutorFactory: TrainableExecutor supports only 'Linear' now"); + + return createTrainableExecutor(std::move(lowered_graph), executors, args, optimizer); +} + +void ExecutorFactory::prepareMigrantTensors( + compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts) +{ + train::TensorRegistries tensor_regs{backend_contexts, true}; + + lowered_graph.graph().operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind); + auto &backend_ctx = backend_contexts.at(lower_info->backend()); + for (auto &&ind : + (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + { + // If an Operation's input/output tensor does not have an own tensor object, + // it must be using migrant tensors, so find the tensor from other tensor registries and + // register it to the current tensor registry if it is portable + if (!backend_ctx->tensor_registry()->getITensor(ind)) + { + auto tensor = tensor_regs.getITensor(ind); + assert(tensor); // The tensor must have been registered + auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor); + if (ptensor) + backend_ctx->tensor_registry()->setMigrantTensor(ind, ptensor); + } + } + }); +} + +exec::IExecutor *ExecutorFactory::createTrainableExecutor( + std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &, const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer) +{ + const auto options = args.options; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; + + auto &graph = lowered_graph->graph(); + + lowered_graph->trainable_graph().operations().iterate([](const onert::ir::OperationIndex &, + const onert::ir::IOperation &op) { + try + { + UNUSED_RELEASE(dynamic_cast<const ir::train::ITrainableOperation &>(op)); + } + catch (std::bad_cast &) + { + throw std::runtime_error("ExecutorFactory: " + op.name() + " is not trainable operation yet"); + } + }); + + // TODO Create context only once instead of replacing + backend::train::TrainableBackendContexts tbackend_contexts; + backend::BackendContexts base_backend_contexts = + createBackendContexts(*lowered_graph, true, custom_kernel_builder); + + // Replace BackendContext with TrainbleBackendContext + for (auto &&pair : base_backend_contexts) + { + auto ctx = pair.second.get(); + const auto &data = ctx->data(); + + // Create partial and trainable graphs + auto tgraph = std::make_unique<ir::train::TrainableGraph>(*data.graph); + data.graph->operations().iterate( + [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &) { + const auto &orig_tgraph = lowered_graph->trainable_graph(); + const auto &trainable_op = orig_tgraph.operation(op_index); + auto gen_index = tgraph->replaceOperation(op_index, trainable_op.clone()); + UNUSED_RELEASE(gen_index); + assert(gen_index == op_index); + }); + data.graph->operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + const auto &orig_tgraph = lowered_graph->trainable_graph(); + if (orig_tgraph.derivatives().exist(index)) + { + const auto &deriv = orig_tgraph.derivatives().at(index); + auto new_deriv = std::make_unique<ir::Operand>(deriv); + auto gen_index = tgraph->addDerivative(index, std::move(new_deriv)); + UNUSED_RELEASE(gen_index); + assert(gen_index == index); + } + }); + + // Remove outputs of whole graph from external_operands + auto external_operands = data.external_operands; + for (const auto &index : lowered_graph->trainable_graph().getOutputs()) + { + if (external_operands.contains(index)) + external_operands.remove(index); + } + + // Set trainable context data + backend::train::TrainableContextData tdata; + tdata.tgraph = std::move(tgraph); + tdata.op_order = std::move(data.op_order); + tdata.external_operands = std::move(external_operands); + tdata.operand_layouts = std::move(data.operand_layouts); + tdata.custom_kernel_builder = std::move(data.custom_kernel_builder); + tdata.is_linear_executor = data.is_linear_executor; + tdata.optimizer = optimizer; + + // TODO Remove dynamic_cast + try + { + const auto backend = pair.first; + const auto tbackend = dynamic_cast<const backend::train::ITrainableBackend *>(backend); + tbackend_contexts.emplace(backend, tbackend->newContext(std::move(tdata))); + } + catch (const std::bad_cast &) + { + throw std::runtime_error("ExecutorFactory: Invalid backend - TrainableExecutor does not " + "support non-trainble backends"); + } + } + base_backend_contexts.clear(); + + train::TensorRegistries tensor_regs{tbackend_contexts, true}; + + initializeSubgraphIOTensors( + *lowered_graph, tbackend_contexts, + (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); + + // linearize + auto order = Linear::linearize(*lowered_graph); + Linear::dump(*lowered_graph, order); + + for (auto &&pair : tbackend_contexts) + { + pair.second->genTensors(); + } + + for (auto &&pair : tbackend_contexts) + { + auto tctx = pair.second.get(); + tctx->genTrainingTensors(); + } + + prepareMigrantTensors(*lowered_graph, tbackend_contexts); + + // Give some runtime objects to builtin KernelGenerator + for (auto &&pair : tbackend_contexts) + { + auto builtin_context = + dynamic_cast<backend::builtin::train::BackendContext *>(pair.second.get()); + if (builtin_context != nullptr) + { + auto builtin_kernel_gen = builtin_context->kernel_gen; + builtin_kernel_gen->setTensorRegistries(tensor_regs); + builtin_kernel_gen->setWholeGraphOutputs(lowered_graph->trainable_graph().getOutputs()); + } + } + + // Adjust the order of backends for the upcoming iteration + auto ordered_contexts = + onert::orderBackendContext<backend::train::TrainableBackendContext>(tbackend_contexts); + + // TODO Remove this simulation + // Simulate the execution for deallocation of tensors + std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map; + { + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexSequence constants; + + auto model_io = + (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + // Prepare scanning + graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + uses_map[ind] = obj.getUses().size(); + + if (obj.isConstant()) + constants.append(ind); + }); + + // A trick to consider constants as an execption + for (const auto &ind : constants) + { + uses_map[ind]++; + } + + for (const auto op_ind : order) + { + const auto &op = graph.operations().at(op_ind); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + for (const auto &ind : op_inputs) + { + const auto &operand = graph.operands().at(ind); + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind)) + { + dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind)); + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + } + + // Check derivative tensors + { + // TODO Support multiple subgraphs + // Check if the derivative tensors corresponding to inputs of model are nullptr + // NOTE The derivative tensors corresponding to inputs of model are for inputs of PermuteLayers + // and they are nullptr and because they are meaningless. + assert(std::all_of(lowered_graph->trainable_graph().getInputs().begin(), + lowered_graph->trainable_graph().getInputs().end(), + [&](const auto &input_idx) { + return tensor_regs.getDerivativeITensor(input_idx) == nullptr; + })); + + // Check if the derivative tensors corresponding to outputs of model exist + assert(std::all_of(lowered_graph->trainable_graph().getOutputs().begin(), + lowered_graph->trainable_graph().getOutputs().end(), + [&](const auto &output_idx) { + return tensor_regs.getDerivativeITensor(output_idx) == nullptr; + })); + } + + train::TrainableCodeMap code_map; + // Generate kernels + for (auto &&pair : ordered_contexts) + { + auto codes = pair.second->genKernels(); + for (auto &&pair : codes) + { + auto &op_ind = pair.first; + auto &tn_seq = pair.second; + auto &op = lowered_graph->trainable_graph().operation(op_ind); + auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); + + assert(code_map.find(op_ind) == code_map.end()); + code_map.insert( + {op_ind, train::TrainableCodeAndInfo{op_ind, &op, lower_info, std::move(tn_seq)}}); + } + } + + if (order.size() != code_map.size()) + { + throw std::runtime_error("ExecutorFactory: Some kernels are not generated"); + } + + auto exec = new exec::train::TrainableExecutor{std::move(lowered_graph), + std::move(tbackend_contexts), + tensor_regs, + std::move(code_map), + order, + tracing_ctx}; + + if (!options->trace_filepath.empty()) { std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx); + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } + // TODO Support MINMAX_H5DUMPER return exec; } +#endif // ONERT_TRAIN } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index f8f989043..cc621bccf 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -20,7 +20,15 @@ #include "TensorRegistries.h" #include "backend/ITensor.h" + +#ifdef ONERT_TRAIN +#include "backend/train/TrainableBackendContext.h" +#endif // ONERT_TRAIN #include "compiler/LoweredGraph.h" +#ifdef ONERT_TRAIN +#include "compiler/train/LoweredTrainableGraph.h" +#include "exec/train/optimizer/Optimizer.h" +#endif // ONERT_TRAIN #include "exec/IExecutors.h" #include <deque> @@ -31,6 +39,15 @@ namespace onert namespace compiler { +// TODO Change to a better name +struct ExecutorFactoryArgs +{ + const util::TracingCtx *tracing_ctx; + const compiler::CompilerOptions *options; + ir::ModelIndex model_index; + std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder; +}; + class ExecutorFactory { public: @@ -38,16 +55,22 @@ public: public: exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index); + const ExecutorFactoryArgs &args); + +#ifdef ONERT_TRAIN + // TODO Unify create() + exec::IExecutor *create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer); +#endif // ONERT_TRAIN private: ExecutorFactory(); private: - static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, + static void prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts); static void prepareBuiltinBackend(const TensorRegistries &tensor_regs, const std::shared_ptr<exec::IExecutors> &executors, @@ -56,22 +79,31 @@ private: static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> orderBackendContext(const backend::BackendContexts &backend_contexts); - static exec::IExecutor *createLinearExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index); - static exec::IExecutor *createDataflowExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index, bool parallel); + static exec::IExecutor * + createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args); + static exec::IExecutor * + createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, bool parallel); +#ifdef ONERT_TRAIN + // TODO Unify prepareMigrantTensors + static void + prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts); + static exec::IExecutor * + createTrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer); +#endif // ONERT_TRAIN private: std::unordered_map< - std::string, - std::function<exec::IExecutor *( - std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx, - const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors, - const ir::ModelIndex &index)>> + std::string, std::function<exec::IExecutor *(std::unique_ptr<compiler::LoweredGraph>, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args)>> _map; }; diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc index fdf4e24f0..ce9b09c2d 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc @@ -776,7 +776,7 @@ Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() c InputToOpSeqs input_to_op_seqs; op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) { - for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED) + for (auto &&input : op_seq.getInputs() | ir::Remove::UNDEFINED) { auto it = input_to_op_seqs.find(input); if (it == input_to_op_seqs.end()) @@ -862,7 +862,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences( // | // [OPERATION] // op_seq_ind_next_to_fp16 // - for (auto it : opseq_map_to_delete) + for (auto &&it : opseq_map_to_delete) { // fp16_to_fp32's input/output num is always 1 auto &op_seq_ind_fp16_to_fp32 = it.first; diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc index 65fd4cd77..f662ef5b9 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.cc @@ -28,7 +28,7 @@ namespace using namespace onert; -uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node) +uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::IOperation &node) { uint32_t size = 0; for (const auto &ind : @@ -39,7 +39,7 @@ uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operatio return size; } -bool isQuant(const ir::Graph &graph, const ir::Operation &node) +bool isQuant(const ir::Graph &graph, const ir::IOperation &node) { for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED) { @@ -52,14 +52,14 @@ bool isQuant(const ir::Graph &graph, const ir::Operation &node) return false; } -bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &, bool) +bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::IOperation &, bool) { // Now, there is no workaround return false; } // if a node can be merged into op_seq -bool isMergeable(const ir::Graph &graph, const ir::Operation &node) +bool isMergeable(const ir::Graph &graph, const ir::IOperation &node) { size_t prev_op_cnt = 0; for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED) @@ -137,7 +137,7 @@ void HEScheduler::scheduleShufflingBackends() } } -bool HEScheduler::isNodeProfiled(const ir::Operation &node) +bool HEScheduler::isNodeProfiled(const ir::IOperation &node) { const bool quant = isQuant(*_graph, node); const auto size = getOperationsFlattenedIOSize(*_graph, node); @@ -207,7 +207,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph { // Check if profiling info about all backend/node pairs already exists bool all_nodes_are_profiled = true; - _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { + _graph->operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) { if (all_nodes_are_profiled) all_nodes_are_profiled = isNodeProfiled(op); }); @@ -224,7 +224,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph ir::OperationIndexMap<bool> visited; graph.operations().iterate( - [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; }); + [&](const ir::OperationIndex &index, const ir::IOperation &) { visited[index] = false; }); // for each task select the backend with the smallest earliest finishing time(eft) for (const auto &rank : _rank_to_op) { @@ -258,7 +258,7 @@ int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend, return size / 400; } -int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend) +int64_t HEScheduler::tryBackend(const ir::IOperation &node, const backend::Backend *backend) { // if there is no profiling info don't use this backend during scheduling if (!_is_profiling_mode) @@ -297,10 +297,10 @@ void HEScheduler::makeRank() VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl; _graph->operations().iterate( - [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); }); + [&](const ir::OperationIndex &index, const ir::IOperation &) { DFSMaxRank(index); }); // Check that ranks are calculated for all operations(nodes) - _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { UNUSED_RELEASE(index); assert(_op_to_rank->find(index) != _op_to_rank->end()); }); @@ -564,7 +564,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation return {prev_op_ft, exec_time}; } -int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node, +int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::IOperation &node, std::multimap<int64_t, int64_t> &transfer_st_exec_time) { int64_t max_pred_eft = 0; diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h index 18ea388fd..df6c07926 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.h +++ b/runtime/onert/core/src/compiler/HEScheduler.h @@ -58,7 +58,7 @@ public: _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"}, _is_parallel_exec{options.executor == "Parallel"} { - for (auto entry : backends) + for (auto &&entry : backends) { if (entry->config()->id() == backend::builtin::Config::ID) continue; @@ -88,7 +88,7 @@ public: std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; } private: - bool isNodeProfiled(const ir::Operation &); + bool isNodeProfiled(const ir::IOperation &); bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend); /** @@ -115,7 +115,7 @@ private: * * @return earliest finishing time of parent nodes */ - int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node, + int64_t predMaxEFT(const backend::Backend *backend, const ir::IOperation &node, std::multimap<int64_t, int64_t> &transfer_st_exec_time); void makeRank(); @@ -146,7 +146,7 @@ private: void scheduleShufflingBackends(); - int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend); + int64_t tryBackend(const ir::IOperation &node, const backend::Backend *backend); /** * @brief Schedule a node and its successor until: diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc index 589331b49..1654bfc8b 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.test.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc @@ -43,7 +43,7 @@ struct MockConfigCPU : public IConfig std::string id() override { return "cpu"; } bool initialize() override { return true; }; bool supportPermutation() override { return false; } - Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; } + Layout supportLayout(const IOperation &, Layout) override { return Layout::UNKNOWN; } bool supportDynamicTensor() override { return false; } bool supportFP16() override { return false; } }; @@ -70,7 +70,7 @@ struct MockConfigGPU : public IConfig std::string id() override { return "gpu"; } bool initialize() override { return true; }; bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override { return ir::Layout::UNKNOWN; } @@ -92,7 +92,7 @@ struct MockConfigNPU : public IConfig std::string id() override { return "npu"; } bool initialize() override { return true; }; bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override { return ir::Layout::UNKNOWN; } diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index f85b8d1bd..4dbe229c8 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -28,16 +28,16 @@ namespace compiler { // TODO(easy) Change the LoweredGraph param to Graph -std::vector<ir::OperationIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph) +std::vector<ir::OperationIndex> Linear::linearize(const compiler::ILoweredGraph &lowered_graph) { return lowered_graph.graph().topolSortOperations(); } // TODO(easy) Change the LoweredGraph param to Graph -void Linear::dump(const compiler::LoweredGraph &lowered_graph, +void Linear::dump(const compiler::ILoweredGraph &lowered_graph, const std::vector<ir::OperationIndex> &order) { - for (const auto ind : order) + for (const auto &ind : order) { // TODO Could logging system can handle this? (Inserting prefix for each line) std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)}; diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h index 9ac9a0139..4f92dc88d 100644 --- a/runtime/onert/core/src/compiler/Linear.h +++ b/runtime/onert/core/src/compiler/Linear.h @@ -21,7 +21,7 @@ #include <memory> #include "ir/Index.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" namespace onert { @@ -31,8 +31,8 @@ namespace compiler class Linear { public: - static std::vector<ir::OperationIndex> linearize(const compiler::LoweredGraph &lowered_graph); - static void dump(const compiler::LoweredGraph &lowered_graph, + static std::vector<ir::OperationIndex> linearize(const compiler::ILoweredGraph &lowered_graph); + static void dump(const compiler::ILoweredGraph &lowered_graph, const std::vector<ir::OperationIndex> &order); }; diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index d53d0ed00..46a45e44a 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -49,7 +49,7 @@ void LoweredGraph::lowerGraph(const CompilerOptions &options) // Build backend contexts auto &backend_manager = BackendManager::get(); // Create contexts for other backends - for (auto backend_str : options.backend_list) + for (auto &&backend_str : options.backend_list) { backend_manager.loadBackend(backend_str); auto backend = backend_manager.get(backend_str); @@ -100,9 +100,9 @@ void LoweredGraph::lowerGraph(const CompilerOptions &options) pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run(); VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl; - for (auto operand : _graph.getInputs()) + for (auto &&operand : _graph.getInputs()) VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl; - for (auto operand : _graph.getOutputs()) + for (auto &&operand : _graph.getOutputs()) VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl; dumper::text::dumpLoweredGraph(*this); @@ -121,8 +121,8 @@ void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolv }); // Set operand lower info using assigned backends to operations - _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) { - const ir::Operation &op = _graph.operations().at(op_ind); + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) { + const ir::IOperation &op = _graph.operations().at(op_ind); auto backend = backend_resolver.getBackend(op_ind); if (!backend) { @@ -135,12 +135,12 @@ void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolv // TODO Change setting layout of each backend at another place auto backend_layout = backend->config()->supportLayout(op, frontend_layout); - for (auto ind : op.getInputs() | ir::Remove::UNDEFINED) + for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED) { auto &operand_li = lower_info().operand.at(ind); operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout}); } - for (auto ind : op.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED) { auto &operand_li = lower_info().operand.at(ind); operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout}); @@ -152,13 +152,13 @@ void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolv // Handle graph inputs and outputs const auto builtin_backend = BackendManager::get().getBuiltin(); auto factor = PermuteFactor{builtin_backend, _graph.layout()}; - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) + for (auto &&index : _graph.getInputs() | ir::Remove::UNDEFINED) { auto &operand_li = lower_info().operand.at(index); assert(operand_li.def_factors().empty()); operand_li.addDefPermuteFactor(factor); } - for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&index : _graph.getOutputs() | ir::Remove::UNDEFINED) { auto &operand_li = lower_info().operand.at(index); operand_li.addUsePermuteFactor(factor); @@ -204,7 +204,7 @@ void LoweredGraph::dumpLowerInfo() auto factors_to_string = [](const PermuteFactorSet &factors) { std::string str; - for (auto factor : factors) + for (auto &&factor : factors) { str += factor.backend()->config()->id(); str += "(" + to_string(factor.layout()) + ")"; @@ -216,7 +216,7 @@ void LoweredGraph::dumpLowerInfo() auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) { std::stringstream sstream; sstream << "{ "; - for (auto op : operations) + for (auto &&op : operations) sstream << op << " "; sstream << "}"; return sstream.str(); diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc index 621f0c7b7..ccd08893f 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.cc +++ b/runtime/onert/core/src/compiler/ManualScheduler.cc @@ -42,7 +42,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap // This fallback will be used in case that `backend_for_all` is unavailable auto fallback = [&]() -> const backend::Backend * { - for (auto backend_id : _options.backend_list) + for (auto &&backend_id : _options.backend_list) { auto backend = resolveBackend(backend_id); if (backend) @@ -58,7 +58,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all->config()->id() << std::endl; - graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { backend_resolver->setBackend(index, backend_all); }); @@ -71,7 +71,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap // By default, Custom uses cpu backend op_type_map[ir::OpCode::Custom] = BackendManager::get().get("cpu"); - graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) { + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &operation) { auto itr = op_type_map.find(operation.opcode()); if (itr != op_type_map.end()) { diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc index fea6a7f25..141fdfe09 100644 --- a/runtime/onert/core/src/compiler/MultiModelCompiler.cc +++ b/runtime/onert/core/src/compiler/MultiModelCompiler.cc @@ -16,6 +16,7 @@ #include "MultiModelCompiler.h" +#include "CompilerHelpers.h" #include "ExecutorFactory.h" #include "ShapeValidator.h" #include "pass/ConstantOutputPass.h" @@ -30,6 +31,7 @@ #include "compiler/StaticShapeInferer.h" #include <misc/string_helpers.h> +#include <misc/polymorphic_downcast.h> namespace onert { @@ -53,7 +55,7 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) /*************************************************** * Prepare compilation phase ***************************************************/ - for (auto options : _voptions) + for (auto &&options : _voptions) { if (!options) throw std::runtime_error{"Empty compile option"}; @@ -63,6 +65,9 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) if (options->he_profiling_mode) throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet"); + if (!options->minmax_filepath.empty()) + throw std::runtime_error("Recording minmax is not supported for multiple models"); + options->forceInternalOptions(); options->verboseOptions(); } @@ -74,7 +79,15 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) for (uint16_t i = 0; i < model_count; i++) { - _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + if (!_nnpkg->model(ir::ModelIndex{i})->hasOnly<ir::Graph>()) + throw std::runtime_error("MultiModelCompiler can only compile models for inference."); + } + + for (uint16_t i = 0; i < model_count; i++) + { + _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + // Mandatory passes pass::PassRunner{} .append(std::make_unique<pass::ConstantOutputPass>(subg)) @@ -100,6 +113,15 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) // Model edge context: copy model edge context auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges()); + // Custom kernels + std::unordered_map<ir::ModelIndex, std::shared_ptr<backend::custom::IKernelBuilder>> + custom_kernel_builders; + for (uint16_t i = 0; i < model_count; i++) + { + auto const model_index = ir::ModelIndex{i}; + custom_kernel_builders[model_index] = _nnpkg->model(model_index)->getKernelBuilder(); + } + // Lower: Assign backend std::unordered_map<ir::ModelIndex, std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>> @@ -110,7 +132,9 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) auto const model_index = ir::ModelIndex{i}; auto model = _nnpkg->model(model_index); - model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) { + model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + dot_dumper.dump(subg, nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value())); // Lower: Assign backend @@ -146,7 +170,7 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called // recursively std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = - StaticShapeInferer::createStaticShapeInferers(model_lsubgs); + createStaticShapeInferers(model_lsubgs); const auto primary_subg_idx = ir::SubgraphIndex{0}; inferers.at(primary_subg_idx)->infer(); @@ -194,11 +218,15 @@ std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) ir::OperationDumper dumper("Executor generation of Subgraph " + std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); - - auto &options = *_voptions[model_index.value()]; - auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create( - std::move(lowered_subg), tracing_ctx.get(), options, executors, model_index)}; + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _voptions[model_index.value()]; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builders[model_index]; + auto executor = std::unique_ptr<exec::IExecutor>{ + ExecutorFactory::get().create(std::move(lowered_subg), executors, args)}; executor->setIndexedRanks(indexed_ranks); executors->emplace(model_index, subg_index, std::move(executor)); } diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h index 89af664f8..b282a5087 100644 --- a/runtime/onert/core/src/compiler/MultiModelCompiler.h +++ b/runtime/onert/core/src/compiler/MultiModelCompiler.h @@ -59,12 +59,6 @@ public: std::shared_ptr<CompilerArtifact> compile(void); private: - std::shared_ptr<ir::Graph> &primary_subgraph() - { - return _nnpkg->primary_model()->at(ir::SubgraphIndex{0}); - } - -private: std::shared_ptr<ir::NNPkg> _nnpkg; std::vector<CompilerOptions *> _voptions; }; diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc index 8c6421744..3e940f037 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.cc +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -52,7 +52,7 @@ void ShapeValidator::checkUnaryOp(const ir::Operation &node) void ShapeValidator::operator()() { _graph.operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); + [&](const ir::OperationIndex &, const ir::IOperation &node) { node.accept(*this); }); } void ShapeValidator::visit(const ir::operation::BatchMatMul &node) diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index 25747d950..a25b326f1 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -99,10 +99,10 @@ void StaticShapeInferer::infer() } } -bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) +bool StaticShapeInferer::checkDynamicInput(const ir::IOperation &op) { const auto &operands = _lowered_subg->graph().operands(); - for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + for (auto &&input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) { if (operands.at(input_idx).info().isDynamic()) { @@ -113,10 +113,10 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) return false; } -bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op) +bool StaticShapeInferer::checkDynamicOutput(const ir::IOperation &op) { auto &operands = _lowered_subg->graph().operands(); - for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED) { if (operands.at(output_idx).info().isDynamic()) { @@ -126,10 +126,10 @@ bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op) return false; } -void StaticShapeInferer::setDynamicOutput(const ir::Operation &op) +void StaticShapeInferer::setDynamicOutput(const ir::IOperation &op) { auto &operands = _lowered_subg->graph().operands(); - for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED) { operands.at(output_idx).info().setDynamic(); } @@ -192,7 +192,7 @@ void StaticShapeInferer::dump() std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> StaticShapeInferer::createStaticShapeInferers( - const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs) + const std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> &lowered_subgs) { // Allocate StaticShapeInferer per each subgraph std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers; @@ -200,7 +200,7 @@ StaticShapeInferer::createStaticShapeInferers( { const auto &subg_index = pair.first; auto &lowered_subg = pair.second; - inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg.get()); + inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg); } // Append observers in all StaticShapeInferers @@ -211,7 +211,7 @@ StaticShapeInferer::createStaticShapeInferers( // TODO: Change this iteration for all to controlflow iteration lowered_subg->graph().operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &op) { + [&](const ir::OperationIndex &, const ir::IOperation &op) { // A Function to append child inferers. These make it possible for a StaticShapeInferer to // call StaticShapeInferes of child subgraphs recursively auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) { @@ -251,7 +251,9 @@ StaticShapeInferer::createStaticShapeInferers( // Append Observers in a StaticShapeInferer if (op.opcode() == ir::OpCode::If) { - const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op); + // TODO Remove dynamic_cast + // An virtual base class cannot be downcasted by static_cast + const auto &if_op = dynamic_cast<const ir::operation::If &>(op); appendChildInferer(if_op.param().then_subg_index); appendChildInferer(if_op.param().else_subg_index); @@ -263,7 +265,8 @@ StaticShapeInferer::createStaticShapeInferers( } else if (op.opcode() == ir::OpCode::While) { - const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op); + // TODO Remove dynamic_cast + const auto &while_op = dynamic_cast<const ir::operation::While &>(op); appendChildInferer(while_op.param().cond_subg_index); appendChildInferer(while_op.param().body_subg_index); @@ -602,6 +605,13 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT)); } +void StaticShapeInferer::visit(const ir::operation::Loss &) +{ + // TODO Consider SparseCategoricalCrossentropy case + + // TODO Consider output shape in case of reduction option +} + void StaticShapeInferer::visit(const ir::operation::LSTM &op) { auto &operands = _lowered_subg->graph().operands(); @@ -1119,7 +1129,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op) auto outputs = op.getOutputs(); if (!axis.isConstant()) { - for (auto output_idx : outputs) + for (auto &&output_idx : outputs) { ir::Operand &output = operands.at(output_idx); output.info().setDynamic(); @@ -1137,7 +1147,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op) ir::Shape new_shape = shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits); - for (auto output_idx : outputs) + for (auto &&output_idx : outputs) { ir::Operand &output = operands.at(output_idx); output.info().shape(new_shape); diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc index 89dd303d4..a6590b13f 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc @@ -28,14 +28,14 @@ namespace compiler namespace pass { -void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) +void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node) { const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index); const auto backend = op_lower_info->backend(); const auto layout = op_lower_info->layout(); const auto factor = PermuteFactor{backend, layout}; - for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h index 4911ace2f..d5b9aa14e 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h @@ -39,7 +39,7 @@ public: std::string id() final { return "ConstantInsertionPass"; } public: - void callback(const ir::OperationIndex &index, ir::Operation &node) final; + void callback(const ir::OperationIndex &index, ir::IOperation &node) final; private: struct ReplaceKey diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc index 6ed154548..32e32d0ef 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc @@ -29,7 +29,7 @@ namespace compiler namespace pass { -void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) +void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node) { const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index); const auto backend = op_lower_info->backend(); @@ -37,7 +37,7 @@ void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Op const auto factor = PermuteFactor{backend, layout}; // Now this runtime does not support the node making output of operation as constant - for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); if (object.isConstant()) diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h index e17d776d1..d60a1033f 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h @@ -36,7 +36,7 @@ public: std::string id() final { return "ConstantLoweringPass"; } public: - void callback(const ir::OperationIndex &index, ir::Operation &node) final; + void callback(const ir::OperationIndex &index, ir::IOperation &node) final; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc index c176f6ffb..1448de473 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc @@ -49,7 +49,7 @@ void ConstantOutputPass::callback(const ir::OperandIndex &ind, ir::Operand &obj) // Make the operations that uses this operand to use the generated operand auto orig_uses = obj.getUses(); - for (auto use : orig_uses) + for (auto &&use : orig_uses) { permute_input_obj.insertUse(use); obj.removeUse(use); diff --git a/runtime/onert/core/src/compiler/pass/IPass.h b/runtime/onert/core/src/compiler/pass/IPass.h new file mode 100644 index 000000000..77f5916fd --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/IPass.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_IPASS_H__ +#define __ONERT_COMPILER_PASS_IPASS_H__ + +#include <string> + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +struct IPass +{ + virtual ~IPass() = default; + + virtual std::string id() = 0; + virtual void run() = 0; +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_IPASS_H__ diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h index 1f1f32f6d..64831a0ac 100644 --- a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h @@ -18,7 +18,7 @@ #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__ #include "OperandPass.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" namespace onert { @@ -30,7 +30,7 @@ namespace pass class LoweredOperandPass : public OperandPass { public: - LoweredOperandPass(compiler::LoweredGraph &lowered_graph) + LoweredOperandPass(compiler::ILoweredGraph &lowered_graph) : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING @@ -42,7 +42,7 @@ public: void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0; protected: - compiler::LoweredGraph &_lowered_graph; + compiler::ILoweredGraph &_lowered_graph; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h index 76ee3d7ff..27ca77c91 100644 --- a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h @@ -18,7 +18,7 @@ #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__ #include "OperationPass.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" namespace onert { @@ -30,7 +30,7 @@ namespace pass class LoweredOperationPass : public OperationPass { public: - LoweredOperationPass(LoweredGraph &lowered_graph) + LoweredOperationPass(ILoweredGraph &lowered_graph) : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING @@ -39,10 +39,10 @@ public: virtual ~LoweredOperationPass() = default; std::string id() override = 0; - void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0; + void callback(const ir::OperationIndex &i, ir::IOperation &o) override = 0; protected: - LoweredGraph &_lowered_graph; + ILoweredGraph &_lowered_graph; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc index 357a8798a..bd9bcb4a4 100644 --- a/runtime/onert/core/src/compiler/pass/OperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc @@ -17,7 +17,7 @@ #include "OperationPass.h" #include "ir/Index.h" -#include "ir/Operation.h" +#include "ir/IOperation.h" #include "ir/Graph.h" namespace onert @@ -30,7 +30,7 @@ namespace pass void OperationPass::run() { _graph.operations().iterate( - [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); }); + [&](const ir::OperationIndex &index, ir::IOperation &node) { callback(index, node); }); } } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h index ac4d818a2..0a00b11d1 100644 --- a/runtime/onert/core/src/compiler/pass/OperationPass.h +++ b/runtime/onert/core/src/compiler/pass/OperationPass.h @@ -29,7 +29,7 @@ namespace onert { namespace ir { -class Operation; +struct IOperation; } // namespace ir } // namespace onert @@ -62,7 +62,7 @@ public: * @param index is the index of a node in graph * @param node is the node in graph */ - virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0; + virtual void callback(const ir::OperationIndex &index, ir::IOperation &node) = 0; /** * @brief Run the pass diff --git a/runtime/onert/core/src/compiler/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h index 3016df490..b34695c97 100644 --- a/runtime/onert/core/src/compiler/pass/Pass.h +++ b/runtime/onert/core/src/compiler/pass/Pass.h @@ -17,6 +17,8 @@ #ifndef __ONERT_COMPILER_PASS_PASS_H__ #define __ONERT_COMPILER_PASS_PASS_H__ +#include "IPass.h" + #include <string> namespace onert @@ -34,7 +36,7 @@ namespace compiler namespace pass { -class Pass +class Pass : public IPass { public: Pass(ir::Graph &graph) : _graph{graph} {} diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc index 2d11be201..cd1b82bb2 100644 --- a/runtime/onert/core/src/compiler/pass/PassRunner.cc +++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc @@ -23,7 +23,7 @@ namespace compiler namespace pass { -PassRunner &PassRunner::append(std::unique_ptr<Pass> pass) +PassRunner &PassRunner::append(std::unique_ptr<IPass> pass) { _passes.emplace_back(std::move(pass)); return *this; diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.h b/runtime/onert/core/src/compiler/pass/PassRunner.h index a43c83f89..03bfbe220 100644 --- a/runtime/onert/core/src/compiler/pass/PassRunner.h +++ b/runtime/onert/core/src/compiler/pass/PassRunner.h @@ -21,7 +21,7 @@ #include <memory> #include <vector> -#include "Pass.h" +#include "IPass.h" #include "util/logging.h" namespace onert @@ -38,12 +38,12 @@ class PassRunner { public: PassRunner() = default; - PassRunner &append(std::unique_ptr<Pass> pass); + PassRunner &append(std::unique_ptr<IPass> pass); void run(); private: - std::vector<std::unique_ptr<Pass>> _passes; + std::vector<std::unique_ptr<IPass>> _passes; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc index c27ce3d09..d9452c7f9 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc @@ -16,6 +16,7 @@ #include "PermutationEliminationPass.h" +#include "backend/Backend.h" #include "util/logging.h" namespace onert @@ -25,7 +26,7 @@ namespace compiler namespace pass { -void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node) +void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::IOperation &node) { _op_ind = ind; node.accept(*this); @@ -73,7 +74,7 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) auto &out_operand_obj = _graph.operands().at(out_operand); assert(out_operand_obj.getDef() == _op_ind); out_operand_obj.unsetDef(); - _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) { + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { if (!op.getOutputs().contains(in_operand)) return; // Update Operation and Operand edges @@ -87,7 +88,7 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) _graph.operations().remove(_op_ind); } - _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) { + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { if (!op.getInputs().contains(in_operand)) return; op.replaceInputs(in_operand, out_operand); @@ -106,7 +107,7 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) in_operand_obj.removeUse(_op_ind); // Make operations(that use the output) use the input - _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) { + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { if (!op.getInputs().contains(out_operand)) return; op.replaceInputs(out_operand, in_operand); diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h index 50c38c53f..18ba99804 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h @@ -49,7 +49,7 @@ public: std::string id() final { return "PermutationEliminationPass"; } public: - void callback(const ir::OperationIndex &i, ir::Operation &n) final; + void callback(const ir::OperationIndex &i, ir::IOperation &n) final; private: void visit(const ir::operation::Permute &) final; diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index 0da1e54df..39eb803f5 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -54,13 +54,13 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index; { assert(operand_li->def_factors().size() == 1); - for (auto factor : operand_li->def_factors()) + for (auto &&factor : operand_li->def_factors()) { factor_to_index.emplace(factor, index); } auto insert_set = operand_li->use_factors() - operand_li->def_factors(); - for (auto factor : insert_set) + for (auto &&factor : insert_set) { const auto permute_operation_index = insertPermute(index, factor); permute_indexes.push_back(permute_operation_index); @@ -75,7 +75,7 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera std::list<ir::OperationIndex> remove_list; auto uses = object.getUses(); - for (auto use : uses) + for (auto &&use : uses) { // If permute operation, ignore it if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end()) diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc index f83b1ba31..f014d29d3 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc @@ -30,7 +30,7 @@ namespace pass using namespace ir; -void PermutationOperationPass::callback(const OperationIndex &, Operation &node) +void PermutationOperationPass::callback(const OperationIndex &, IOperation &node) { node.accept(*this); } diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h index cea5de288..e253a77ad 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h @@ -36,7 +36,7 @@ public: std::string id() final { return "PermutationOperationPass"; } public: - void callback(const ir::OperationIndex &i, ir::Operation &n) final; + void callback(const ir::OperationIndex &i, ir::IOperation &n) final; public: void visit(const ir::operation::BinaryArithmetic &) final; diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc index 35fb575b0..162c4e7ef 100644 --- a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc @@ -37,15 +37,15 @@ void UnusedOperandEliminationPass::run() { util::Set<ir::OperandIndex> used; - _graph.operations().iterate([&](const ir::OperationIndex &, const ir::Operation &node) { - for (auto ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED) + _graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &node) { + for (auto &&ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED) { used.add(ind); } }); // Graph's inputs/outputs are always considered as used - for (auto ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED) + for (auto &&ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED) { used.add(ind); } diff --git a/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc new file mode 100644 index 000000000..490c648cd --- /dev/null +++ b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/train/LoweredTrainableGraph.h" + +#include "../ManualScheduler.h" +#include "../pass/ConstantInsertionPass.h" +#include "../pass/ConstantLoweringPass.h" +#include "../pass/PassRunner.h" +#include "../pass/PermutationEliminationPass.h" +#include "../pass/PermutationInsertionPass.h" +#include "../pass/PermutationOperationPass.h" +#include "../../backend/builtin/Config.h" +#include "../../dumper/text/GraphDumper.h" +#include "../../ir/verifier/Verifier.h" +#include "TrainableOperationConverter.h" + +#include "backend/Backend.h" +#include "backend/train/ITrainableBackend.h" +#include "compiler/BackendResolver.h" +#include "util/logging.h" + +#include <cassert> +#include <sstream> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +LoweredTrainableGraph::LoweredTrainableGraph(ir::train::TrainableGraph &graph, + const CompilerOptions &options) + : _trainable_graph{graph} +{ + lowerGraph(options); +} + +void LoweredTrainableGraph::lowerGraph(const CompilerOptions &options) +{ + // Build backend contexts + auto &backend_manager = BackendManager::get(); + // Create contexts for other backends + for (auto &&backend_str : options.backend_list) + { + backend_manager.loadBackend(backend_str); + auto backend = backend_manager.get(backend_str); + + // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some + // are not available on x64 or some other platforms. So this may be a workaround for x64 and + // we should change it back(throw if backend is not loaded) later. + if (!backend) + { + VERBOSE(LoweredTrainableGraph) << "Cannot load backend - " << backend_str << std::endl; + continue; + } + } + if (backend_manager.num_backends() == 0) + throw std::runtime_error{"No available backends loaded."}; + + // TODO Move "schedule" phase out of here + // TODO Scheduling + std::unique_ptr<BackendResolver> backend_resolver; + auto all_backends = backend_manager.getAll(); + + auto scheduler = ManualScheduler(all_backends, options); + backend_resolver = scheduler.schedule(_trainable_graph.graph()); + + // Check if backends are trainable + _trainable_graph.operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &) { + const auto backend = backend_resolver->getBackend(op_ind); + + // TODO Remove dynamic_cast + if (dynamic_cast<const backend::train::ITrainableBackend *>(backend) == nullptr) + { + throw std::runtime_error(backend->config()->id() + "backend does not support training"); + } + }); + + makeLowerInfo(*backend_resolver); + VERBOSE(LoweredTrainableGraph) << "dump before mandatory passes" << std::endl; + dumper::text::dumpLoweredGraph(*this); + + // Mandatory passes - kind of legalization(?) + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::ConstantInsertionPass>(*this)) + .append(std::make_unique<compiler::pass::ConstantLoweringPass>(*this)) + .append(std::make_unique<compiler::pass::PermutationOperationPass>(*this)) + .append(std::make_unique<compiler::pass::PermutationInsertionPass>(*this)) + .run(); + + // TODO Move converting Permute op into PermutationInsertionPass + auto op_converter = TrainableOperationConverter{_trainable_graph, nullptr}; + _trainable_graph.operations().iterate( + [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) { + if (op.opcode() == ir::OpCode::Permute) + { + auto trainable_op = op_converter(op); + auto gen_index = _trainable_graph.replaceOperation(index, std::move(trainable_op)); + UNUSED_RELEASE(gen_index); + assert(gen_index == index); + } + }); + + dumpLowerInfo(); + + // Optimization passes (optional) + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::PermutationEliminationPass>(*this)) + .run(); + + // TODO Update LowerInfo for training + + VERBOSE(LoweredTrainableGraph) << "Dump after all the passes" << std::endl; + for (auto &&operand : _trainable_graph.getInputs()) + VERBOSE(LoweredTrainableGraph) << "Graph Input : " << operand << std::endl; + for (auto &&operand : _trainable_graph.getOutputs()) + VERBOSE(LoweredTrainableGraph) << "Graph Output : " << operand << std::endl; + dumper::text::dumpLoweredGraph(*this); + + // Graph verifications + { + assert(ir::verifier::InputOutputChecker().verify(_trainable_graph.graph())); + assert(ir::verifier::DAGChecker().verify(_trainable_graph.graph())); + assert(ir::verifier::EdgeChecker().verify(_trainable_graph.graph())); + } +} + +void LoweredTrainableGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver) +{ + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + lower_info().operand.set(index, std::make_unique<OperandLowerInfo>()); + }); + + // Set operand lower info using assigned backends to operations + _trainable_graph.operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto backend = backend_resolver.getBackend(op_ind); + if (!backend) + { + throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"}; + } + + auto frontend_layout = _trainable_graph.layout(); + + // The layout of each backend should be set at another place + // TODO Change setting layout of each backend at another place + auto backend_layout = backend->config()->supportLayout(op, frontend_layout); + + for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(ind); + operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout}); + } + for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(ind); + operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout}); + } + lower_info().operation.set( + op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout)); + }); + + // Handle graph inputs and outputs + const auto builtin_backend = BackendManager::get().getBuiltin(); + auto factor = PermuteFactor{builtin_backend, _trainable_graph.layout()}; + for (auto &&index : _trainable_graph.getInputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(factor); + } + for (auto &&index : _trainable_graph.getOutputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(index); + operand_li.addUsePermuteFactor(factor); + } + + // Handle variable tensors + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) { + // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs + // and not undefined operand - these are variable tensors. For example, + // UnidirectionalSequenceLSTM has such inputs. + if (operand.info().isVariable()) + { + // The variable operand with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + auto operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement()); + } + }); +} + +void LoweredTrainableGraph::dumpLowerInfo() +{ + if (::onert::util::logging::ctx.enabled() == false) + return; + + std::map<uint32_t, std::string> dumps; + + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) { + const auto operand_lower_info = lower_info().operand.getRawPtr(index); + assert(operand_lower_info); + if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty()) + { + auto shape_to_string = [](const ir::Shape &shape) { + std::stringstream sstream; + sstream << "{ "; + for (auto i = 0; i < shape.rank(); ++i) + sstream << (shape.dim(i)) << " "; + sstream << "}"; + return sstream.str(); + }; + + auto factors_to_string = [](const PermuteFactorSet &factors) { + std::string str; + for (auto &&factor : factors) + { + str += factor.backend()->config()->id(); + str += "(" + to_string(factor.layout()) + ")"; + str += " "; + } + return "{ " + str + "}"; + }; + + auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) { + std::stringstream sstream; + sstream << "{ "; + for (auto &&op : operations) + sstream << op << " "; + sstream << "}"; + return sstream.str(); + }; + + auto data_to_str = [](const ir::Data *data) { + return (data ? (std::to_string(data->size()) + " bytes") : "N/A"); + }; + + std::string shape_str = shape_to_string(object.shape()); + std::string def_op = operation_index_set_to_string({object.getDef()}); + std::string use_ops = operation_index_set_to_string(object.getUses()); + std::string def_factors = factors_to_string(operand_lower_info->def_factors()); + std::string use_factors = factors_to_string(operand_lower_info->use_factors()); + std::stringstream sstream; + sstream << "Operand " << index << " Info" << std::endl; + sstream << " - Shape : " << shape_str << std::endl; + sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl; + sstream << " - Data : " << data_to_str(object.data()) << std::endl; + sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl; + dumps.emplace(index.value(), sstream.str()); + } + }); + + for (const auto &e : dumps) + { + if (!e.second.empty()) + { + std::istringstream iss(e.second); + std::string line; + while (std::getline(iss, line)) + VERBOSE(Lower) << line << std::endl; + } + } +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc new file mode 100644 index 000000000..d2153296f --- /dev/null +++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "StaticDerivativeShapeInferer.h" +#include "util/ShapeInference.h" +#include "util/logging.h" + +#include <misc/polymorphic_downcast.h> + +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +void StaticDerivativeShapeInferer::infer() +{ + // It is not determined to iterate in reverse order. + auto sorted_ops = _lowered_subg->graph().topolSortOperations(); + for (auto it = sorted_ops.rbegin(); it != sorted_ops.rend(); ++it) + { + const auto op_idx = *it; + const auto &op = _lowered_subg->trainable_graph().operation(op_idx); + if (checkDynamicInput(op)) + { + std::stringstream msg; + msg << "StaticDerivativeShapeInferer does not support dynamic shape yet, "; + msg << op.name() << "(op index: " << op_idx << ") has dynamic shape."; + throw std::runtime_error(msg.str()); + } + + checkOutput(op); + + op.accept(*this); + } +} + +void StaticDerivativeShapeInferer::dump() +{ + // TODO dump +} + +bool StaticDerivativeShapeInferer::checkDynamicInput(const ir::IOperation &op) +{ + const auto &operands = _lowered_subg->graph().operands(); + for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (operands.at(input_idx).info().isDynamic()) + { + return true; + } + } + + return false; +} + +void StaticDerivativeShapeInferer::checkOutput(const ir::IOperation &op) +{ + const auto &derivatives = _lowered_subg->trainable_graph().derivatives(); + for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (!derivatives.exist(output_idx)) + { + std::stringstream msg; + msg << "StaticDerivativeShapeInferer : Invalid output, "; + msg << op.name() << "'s derivative output(index: " << output_idx << ") does not exist."; + throw std::runtime_error(msg.str()); + } + } +} + +void StaticDerivativeShapeInferer::setShape(const ir::OperandIndex &index, const ir::Shape &shape) +{ + auto &tgraph = _lowered_subg->trainable_graph(); + + if (tgraph.derivatives().exist(index)) + tgraph.changeDerivativeShape(index, shape); + else + { + // NOTE This code assumes the types are always the same, but I'm not sure. + const auto &type = tgraph.operands().at(index).typeInfo(); + const auto new_index = tgraph.addDerivative(index, std::make_unique<ir::Operand>(shape, type)); + assert(new_index == index); + UNUSED_RELEASE(new_index); + } +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Conv2D &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::ElementwiseActivation &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Loss &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Permute &op) +{ + const auto &derivatives = _lowered_subg->trainable_graph().derivatives(); + + const auto &output_idx = op.getOutputs().at(0); + const auto &output = derivatives.at(output_idx); + + // re-sizing input derivative shape + const auto &input_idx = op.getInputs().at(0); + const auto &new_shape = output.info().shape(); + setShape(input_idx, new_shape); +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Pool2D &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Reshape &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Softmax &) +{ + // NYI +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h new file mode 100644 index 000000000..48b3172d2 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ +#define __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ + +#include "ir/train/TrainableOperationVisitor.h" + +#include "compiler/train/LoweredTrainableGraph.h" +#include "ir/Index.h" + +#include <memory> +#include <unordered_map> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +/** + * @brief Class to infer shape before running kernels. It does the following: + * - re-calculate and set output shape at compile time (before running kernels) + * - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning + * shapes of outputs will be calculated during running kernels + */ +class StaticDerivativeShapeInferer : public ir::train::TrainableOperationVisitor +{ +public: + StaticDerivativeShapeInferer(compiler::train::LoweredTrainableGraph *lowered_subg) + : _lowered_subg{lowered_subg} + { + } + + /** + * @brief Infer shape of operands belonging to ops and set the output shape. + * If output shape cannot be known without running op, mark it so that it can be allocated + * when running kernel. + */ + void infer(void); + + void dump(); + +private: + bool checkDynamicInput(const ir::IOperation &op); + void checkOutput(const ir::IOperation &op); + void setShape(const ir::OperandIndex &index, const ir::Shape &shape); + +private: + void visit(const ir::train::operation::Conv2D &op) override; + void visit(const ir::train::operation::ElementwiseActivation &op) override; + void visit(const ir::train::operation::Loss &op) override; + void visit(const ir::train::operation::Permute &op) override; + void visit(const ir::train::operation::Pool2D &op) override; + void visit(const ir::train::operation::Reshape &op) override; + void visit(const ir::train::operation::Softmax &op) override; + +private: + compiler::train::LoweredTrainableGraph *_lowered_subg; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ diff --git a/runtime/onert/core/src/compiler/train/TensorRegistries.h b/runtime/onert/core/src/compiler/train/TensorRegistries.h new file mode 100644 index 000000000..48eaf10a1 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TensorRegistries.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ +#define __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ + +#include "../../backend/builtin/Config.h" +#include "../../backend/builtin/train/TensorRegistry.h" + +#include <backend/train/TrainableBackendContext.h> + +#include <memory> +#include <unordered_set> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TensorRegistries +{ +public: + TensorRegistries() = default; + + TensorRegistries(const backend::train::TrainableBackendContexts &backend_contexts, + bool include_builtin) + { + for (const auto &e : backend_contexts) + { + auto tensor_reg = e.second->tensor_registry(); + if (e.first->config()->id() == backend::builtin::Config::ID) + { + _builtin_tensor_reg = + std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(tensor_reg); + if (include_builtin) + _tensor_regs.insert(tensor_reg); + } + else + { + _tensor_regs.insert(tensor_reg); + } + } + } + + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator begin() const + { + return _tensor_regs.cbegin(); + } + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator end() const + { + return _tensor_regs.cend(); + } + + std::shared_ptr<backend::builtin::train::TensorRegistry> getBuiltinTensorRegistry() const + { + return _builtin_tensor_reg; + } + + backend::ITensor *getITensor(ir::OperandIndex index) const + { + for (auto &&tensor_reg : _tensor_regs) + { + auto tensor = tensor_reg->getITensor(index); + if (tensor) + return tensor; + } + return nullptr; + } + + backend::ITensor *getDerivativeITensor(ir::OperandIndex index) const + { + for (auto &&tensor_reg : _tensor_regs) + { + auto tensor = tensor_reg->getDerivativeITensor(index); + if (tensor) + return tensor; + } + return nullptr; + } + +private: + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>> _tensor_regs; + std::shared_ptr<backend::builtin::train::TensorRegistry> _builtin_tensor_reg; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc new file mode 100644 index 000000000..d20ae9fd3 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableOperationConverter.h" + +#include "ir/train/Operations.Include.h" +#include "util/Utils.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +TrainableOperationConverter::TrainableOperationConverter( + ir::train::TrainableGraph &tgraph, const compiler::train::TrainingInfo *training_info) + : UntrainableOperationConverter{tgraph}, _training_info{training_info} +{ + // Avoid unused-private-field error + UNUSED_RELEASE(_training_info); +} + +void TrainableOperationConverter::visit(const ir::operation::Conv2D &node) +{ + _return_op = std::make_unique<ir::train::operation::Conv2D>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::ElementwiseActivation &node) +{ + if (node.param().op_type == ir::operation::ElementwiseActivation::Type::RELU) + { + _return_op = std::make_unique<ir::train::operation::ElementwiseActivation>(node); + } + else + { + UntrainableOperationConverter::visit(node); + } +} + +void TrainableOperationConverter::visit(const ir::operation::FullyConnected &node) +{ + _return_op = std::make_unique<ir::train::operation::FullyConnected>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Loss &node) +{ + _return_op = std::make_unique<ir::train::operation::Loss>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Permute &node) +{ + _return_op = std::make_unique<ir::train::operation::Permute>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Pool2D &node) +{ + _return_op = std::make_unique<ir::train::operation::Pool2D>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Reshape &node) +{ + _return_op = std::make_unique<ir::train::operation::Reshape>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Softmax &node) +{ + _return_op = std::make_unique<ir::train::operation::Softmax>(node); +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h new file mode 100644 index 000000000..5f6fc10c3 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ +#define __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ + +#include "UntrainableOperationConverter.h" + +#include "compiler/train/TrainingInfo.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TrainableOperationConverter : public UntrainableOperationConverter +{ +public: + TrainableOperationConverter(ir::train::TrainableGraph &trainable_graph, + const compiler::train::TrainingInfo *training_info); + + using UntrainableOperationConverter::operator(); + +private: + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Loss &node) override; + void visit(const ir::operation::Permute &node) override; + void visit(const ir::operation::Pool2D &node) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Softmax &) override; + +private: + const compiler::train::TrainingInfo *_training_info; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.cc b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc new file mode 100644 index 000000000..711af1651 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainingCompiler.h" + +#include "StaticDerivativeShapeInferer.h" +#include "TrainableOperationConverter.h" +#include "pass/LossInsertionPass.h" +#include "../CompilerHelpers.h" +#include "../ExecutorFactory.h" +#include "../pass/ConstantOutputPass.h" +#include "../pass/OddOutputPass.h" +#include "../pass/PassRunner.h" +#include "../pass/UnusedOperandEliminationPass.h" +#include "../ShapeValidator.h" +#include "../../dumper/dot/DotDumper.h" +#include "../../exec/train/TrainableExecutors.h" +#include "../../ir/OperationDumper.h" +#include "../../ir/verifier/Verifier.h" + +#include <compiler/StaticShapeInferer.h> +#include <compiler/train/LoweredTrainableGraph.h> +#include <ir/train/TrainableGraph.h> +#include <exec/train/optimizer/SGD.h> + +#include <misc/polymorphic_downcast.h> +#include <misc/string_helpers.h> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +TrainingCompiler::TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const TrainingInfo &training_info) + : _model{nnpkg->primary_model()}, _options{copts[0].get()}, _training_info{training_info} +{ + if (nnpkg->model_count() > 1) + throw std::runtime_error("TrainingCompiler does not support multiple models yet"); + + if (nnpkg->primary_model()->subgraphs_count() > 1) + throw std::runtime_error("TrainingCompiler does not support multiple subgraphs yet"); +} + +std::shared_ptr<CompilerArtifact> TrainingCompiler::compile(void) +{ + /*************************************************** + * Prepare compilation phase + ***************************************************/ + if (!_options) + throw std::runtime_error{"Empty compile option"}; + + // Mode check + // TODO handle option for each model + if (_options->he_profiling_mode) + { + if (!_options->he_scheduler) + throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); + + if (_options->executor != "Dataflow") + throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); + } + + if (!_options->minmax_filepath.empty()) + { + if (_options->executor != "Linear") + throw std::runtime_error("Recording minmax works only with Linear executor"); + } + + _options->forceInternalOptions(); + _options->verboseOptions(); + + auto custom_kernel_builder = _model->getKernelBuilder(); + + _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + // Mandatory passes + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg)) + .append(std::make_unique<compiler::pass::OddOutputPass>(subg)) + .run(); + + // Optimizations + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg)) + .run(); + }); + + std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>> + trainable_subgraphs; + + if (_model->hasOnly<ir::Graph>()) + { + // Create trainable subgraphs by copy and converting inference model + _model->iterate([&](const ir::SubgraphIndex &subg_index, const ir::IGraph &graph) { + const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph); + // Create TrainableGraph by copying Graph + auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg); + + // Convert operations to trainable operations + auto converter = TrainableOperationConverter{*trainable_subg, &_training_info}; + subg.operations().iterate( + [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &op) { + auto trainable_op = converter(op); + auto gen_index = trainable_subg->replaceOperation(op_index, std::move(trainable_op)); + UNUSED_RELEASE(gen_index); + assert(gen_index == op_index); + }); + + trainable_subgraphs[subg_index] = std::move(trainable_subg); + }); + } + else + { + // TODO Support models that have TrainableGraphs + throw std::runtime_error("TrainingCompiler: Invalid model"); + } + + // operation + _model.reset(); + + // Apply pass for trainable subgraphs + for (auto &&pair : trainable_subgraphs) + { + auto trainable_subg = pair.second; + auto subg_index = pair.first; + + compiler::pass::PassRunner{} + .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info, + subg_index)) + .run(); + } + + // Change input shape according to batch_size + for (auto &&pair : trainable_subgraphs) + { + auto trainable_subg = pair.second; + + for (const auto &ind : trainable_subg->getInputs()) + { + auto &input = trainable_subg->operands().at(ind); + auto new_shape = input.info().shape(); + // TODO Consider batch size index + if (new_shape.dim(0) != 1) + throw std::runtime_error("the first dim is not 1. It is not supported yet."); + new_shape.dim(0) = _training_info.batchSize(); + input.info().shape(new_shape); + } + } + + /*************************************************** + * Backend independent analysis & optimization phase + ***************************************************/ + // TODO Handle dump level for each model + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + auto tracing_ctx = std::make_unique<util::TracingCtx>(); + + // Lower: Assign backend + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>> + lowered_subgs; + { + for (auto &&pair : trainable_subgraphs) + { + auto &subg_index = pair.first; + auto trainable_subg = pair.second; + + // Lower: Assign backend + lowered_subgs[subg_index] = + std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options); + // Set tracing_ctx for copied graph + if (tracing_ctx != nullptr) + tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value()); + } + } + + for (const auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + const auto &lowered_subg = pair.second; + dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value())); + } + + // Set derivatives as default tensor info + for (const auto &pair : lowered_subgs) + { + auto lowered_subg = pair.second.get(); + auto &tgraph = lowered_subg->trainable_graph(); + tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) { + if (!obj.isConstant()) + { + auto deriv = std::make_unique<ir::Operand>(obj); + const auto gen_index = tgraph.addDerivative(index, std::move(deriv)); + assert(gen_index == index); + UNUSED_RELEASE(gen_index); + } + }); + } + + // Shape inference. + { + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(lowered_subgs); + + const auto primary_subg_idx = ir::SubgraphIndex{0}; + inferers.at(primary_subg_idx)->infer(); + + for (const auto &pair_inferer : inferers) + { + const auto inferer = pair_inferer.second.get(); + inferer->dump(); + } + + // NOTE StaticDerivativeShapeInferer is allocated for each subgraph, + // so it does not support models that have controlflow operations yet. + for (auto &&pair : lowered_subgs) + { + auto &lowered_subg = pair.second; + auto inferer = std::make_unique<StaticDerivativeShapeInferer>(lowered_subg.get()); + inferer->infer(); + inferer->dump(); + } + } + + // Shape validation + for (const auto &pair : lowered_subgs) + { + auto &lowered_subg = pair.second; + compiler::ShapeValidator{lowered_subg->graph()}(); + } + + // TODO Validate shapes of derivative tensors + + // Create optimizer + // TODO Set properties of optimizer + std::shared_ptr<exec::train::optimizer::Optimizer> optimizer; + const auto &optim_info = _training_info.optimizerInfo(); + if (optim_info.optim_code == exec::train::optimizer::OptimizerCode::SGD) + optimizer = std::make_shared<exec::train::optimizer::SGD>(optim_info.learning_rate); + else + throw std::runtime_error("Invalid optimizer type, " + + exec::train::optimizer::toString(optim_info.optim_code)); + + /************************************************************* + * Backend independent analysis & optimization phase finished + *************************************************************/ + auto executors = std::make_shared<exec::train::TrainableExecutors>(); + for (auto &&pair : lowered_subgs) + { + auto const model_index = ir::ModelIndex{0}; + auto const subg_index = pair.first; + auto &lowered_subg = pair.second; + auto const indexed_ranks = lowered_subg->indexed_ranks(); + + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); + lowered_subg->graph().operations().iterate( + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _options; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builder; + auto executor = std::unique_ptr<exec::IExecutor>{ + ExecutorFactory::get().create(std::move(lowered_subg), executors, args, optimizer)}; + executor->setIndexedRanks(indexed_ranks); + executors->emplace(model_index, subg_index, std::move(executor)); + } + + /******************************** + * Code generation phase finished + ********************************/ + return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx)); +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.h b/runtime/onert/core/src/compiler/train/TrainingCompiler.h new file mode 100644 index 000000000..b93437217 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file TrainingCompiler.h + * @brief This file contains TrainingCompiler class to define and run compilation phase + */ + +#ifndef __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ +#define __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ + +#include "compiler/CompilerOptions.h" +#include "compiler/ICompiler.h" +#include "compiler/train/TrainingInfo.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +/** + * @brief Class to compile NN package + */ +class TrainingCompiler : public ICompiler +{ +public: + /** + * @brief Construct a new TrainingCompiler object for single model + * @param[in] model model to compile + * @param[in] inference_compiler Compiler for inference + * @param[in] coptions Compiler Options + * @param[in] training_info Training information + */ + explicit TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const TrainingInfo &training_info); + + /** + * @brief Default Construct + * + */ + TrainingCompiler(void) = delete; + + /** + * @brief Destroy the TrainingCompiler object + */ + ~TrainingCompiler() = default; + +public: + /** + * @brief Do compilation with the options + * + * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation + */ + std::shared_ptr<CompilerArtifact> compile(void); + +private: + std::shared_ptr<ir::Model> _model; + CompilerOptions *_options; + const TrainingInfo _training_info; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc new file mode 100644 index 000000000..6a5a052b6 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UntrainableOperationConverter.h" + +#include "ir/train/operation/UntrainableOperation.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +UntrainableOperationConverter::UntrainableOperationConverter(ir::train::TrainableGraph &tgraph) + : _tgraph{tgraph}, _return_op{nullptr} +{ +} + +std::unique_ptr<ir::train::ITrainableOperation> UntrainableOperationConverter:: +operator()(const ir::IOperation &op) +{ + op.accept(*this); + + return std::move(_return_op); +} + +#define OP(InternalName) \ + void UntrainableOperationConverter::visit(const ir::operation::InternalName &node) \ + { \ + _return_op = \ + std::make_unique<ir::train::operation::UntrainableOperation<ir::operation::InternalName>>( \ + node); \ + } +#include "ir/Operations.lst" +#undef OP + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h new file mode 100644 index 000000000..e960b3831 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ +#define __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ + +#include "ir/Operations.Include.h" +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableGraph.h" + +#include <memory> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class UntrainableOperationConverter : public ir::OperationVisitor +{ +public: + UntrainableOperationConverter(ir::train::TrainableGraph &tgraph); + std::unique_ptr<ir::train::ITrainableOperation> operator()(const ir::IOperation &op); + +#define OP(InternalName) void visit(const ir::operation::InternalName &node); +#include "ir/Operations.lst" +#undef OP + +protected: + ir::train::TrainableGraph &_tgraph; + std::unique_ptr<ir::train::ITrainableOperation> _return_op; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc new file mode 100644 index 000000000..3e01a9739 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LossInsertionPass.h" + +#include "ir/train/TrainableGraph.h" +#include "ir/train/operation/Loss.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ +namespace pass +{ + +void LossInsertionPass::run() +{ + const auto &loss_info = _training_info->lossInfo(); + + ir::operation::Loss::Param param; + param.op_type = loss_info.type; + + if (_trainable_graph.getOutputs().size() != 1) + { + throw std::runtime_error("LossInsertionPass: Not supported multiple outputs"); + } + + // TODO Consider SparseCategoricalCrossentropy y_true shape + // SparseCategoricalCrossentropy loss has a different y_true shape than y_pred. + + // TODO Implement Loop [0, getOutputs().size()) + // index: a loop index + const auto index = 0; + const auto &y_pred_index = _trainable_graph.getOutputs().at(index); + const auto &y_pred = _trainable_graph.operands().at(y_pred_index); + const auto &shape = y_pred.shape(); + const auto &type_info = y_pred.typeInfo(); + auto y_true_index = _trainable_graph.addOperand(shape, type_info); + ir::OperandIndexSequence inputs{y_pred_index, y_true_index}; + + // TODO Consider Reduction + // Some types of Reduction have the same shape y_true and output. + + const ir::TypeInfo float_op(ir::DataType::FLOAT32); + auto output_index = _trainable_graph.addOperand(ir::Shape{1}, float_op); + ir::OperandIndexSequence outputs{output_index}; + + auto loss_op = std::make_unique<ir::operation::Loss>(inputs, outputs, param); + auto trainable_loss_op = std::make_unique<ir::train::operation::Loss>(*loss_op); + + _trainable_graph.addOperation(std::move(trainable_loss_op)); + + _trainable_graph.addInput(y_true_index); + + // TODO Add loss as many as output size + _trainable_graph.addLoss(output_index, ir::IOIndex{index}); +} + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h new file mode 100644 index 000000000..ed4d60c96 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ +#define __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ + +#include "Pass.h" + +#include "compiler/train/TrainingInfo.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ +namespace pass +{ + +class LossInsertionPass : public Pass +{ +public: + LossInsertionPass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info, + const ir::SubgraphIndex &subg_index) + : Pass{trainable_graph, training_info}, _subg_index{subg_index} + { + } + +public: + std::string id() final { return "LossInsertionPass"; } + void run() final; + +private: + ir::SubgraphIndex _subg_index; +}; + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ diff --git a/runtime/onert/core/src/compiler/train/pass/Pass.h b/runtime/onert/core/src/compiler/train/pass/Pass.h new file mode 100644 index 000000000..d64c06cf4 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/Pass.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_PASS_PASS_H__ +#define __ONERT_COMPILER_TRAIN_PASS_PASS_H__ + +#include "../../pass/IPass.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +class TrainableGraph; +} // namespace train +} // namespace ir +} // namespace onert + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TrainingInfo; + +namespace pass +{ + +class Pass : public compiler::pass::IPass +{ +public: + Pass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info) + : _trainable_graph{trainable_graph}, _training_info{training_info} + { + } + virtual ~Pass() = default; + +protected: + ir::train::TrainableGraph &_trainable_graph; + const TrainingInfo *_training_info; +}; + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_PASS_PASS_H__ diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc index d4e4d5484..9257434fa 100644 --- a/runtime/onert/core/src/dumper/dot/DotBuilder.cc +++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc @@ -29,7 +29,7 @@ DotBuilder::DotBuilder() {} void DotBuilder::update(const Node &node_info) { add(node_info); - for (auto edge : node_info.out_edges()) + for (auto &&edge : node_info.out_edges()) { addEdge(node_info, *edge); } @@ -47,7 +47,7 @@ void DotBuilder::add(const Node &node) _dot << node.id(); std::stringstream ss; _dot << "["; - for (auto attr : node.attributes()) + for (auto &&attr : node.attributes()) { _dot << attr.first << "=\"" << attr.second << "\" "; } diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc index 0bb2fa11f..ab77a6c62 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.cc +++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc @@ -98,10 +98,10 @@ generate_dot_operations(const ir::Graph &graph, { ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations; const auto &operations = graph.operations(); - operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) { + operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &op) { auto node = std::make_unique<Operation>(index, op); - for (auto input : op.getInputs()) + for (auto &&input : op.getInputs()) { using onert::dumper::dot::Operand; @@ -113,7 +113,7 @@ generate_dot_operations(const ir::Graph &graph, input_node->addOutEdge(node.get()); } - for (auto output : op.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&output : op.getOutputs() | ir::Remove::UNDEFINED) { using onert::dumper::dot::Operand; auto &output_node = dot_operands.at(output); @@ -126,7 +126,7 @@ generate_dot_operations(const ir::Graph &graph, return dot_operations; } -void update_lower_info(const compiler::LoweredGraph &lowered_graph, +void update_lower_info(const compiler::ILoweredGraph &lowered_graph, ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands) { const auto &operands = lowered_graph.graph().operands(); @@ -153,11 +153,11 @@ void update_lower_info(const compiler::LoweredGraph &lowered_graph, }); } -void update_lower_info(const compiler::LoweredGraph &lowered_graph, +void update_lower_info(const compiler::ILoweredGraph &lowered_graph, ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations) { const auto &operations = lowered_graph.graph().operations(); - operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index); if (lower_info) { @@ -213,7 +213,8 @@ void DotDumper::dump(const ir::Graph &graph, const std::string &tag) dump_to_file(dot_operands, dot_operations, tag); } -void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag) +// TODO Support derivative tensors +void DotDumper::dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag) { if (_level == Level::OFF) { diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h index 6249010d3..fca5f356c 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.h +++ b/runtime/onert/core/src/dumper/dot/DotDumper.h @@ -15,7 +15,7 @@ */ #include "ir/Graph.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__ #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__ @@ -57,7 +57,7 @@ public: * @param[in] tag The name of dot file that would be created * @return N/A */ - void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag); + void dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag); private: Level _level; diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc index 87c5ba148..2ef08c9c6 100644 --- a/runtime/onert/core/src/dumper/dot/OperationNode.cc +++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc @@ -31,7 +31,7 @@ namespace dot const std::string Operation::OPERATION_SHAPE = "rect"; const std::string Operation::BG_COLOR_SCHEME = "pastel18"; -Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node) +Operation::Operation(const ir::OperationIndex &index, const ir::IOperation &node) : Node{"operation" + std::to_string(index.value())} { setAttribute("label", std::to_string(index.value()) + " : " + node.name()); diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.h b/runtime/onert/core/src/dumper/dot/OperationNode.h index 74a37d3fb..d9292ad0c 100644 --- a/runtime/onert/core/src/dumper/dot/OperationNode.h +++ b/runtime/onert/core/src/dumper/dot/OperationNode.h @@ -25,7 +25,7 @@ #define __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__ #include "Node.h" -#include "ir/Operation.h" +#include "ir/IOperation.h" #include "ir/Index.h" namespace onert @@ -52,7 +52,7 @@ public: * @param[in] index operation index * @param[in] node operation object */ - Operation(const ir::OperationIndex &index, const ir::Operation &node); + Operation(const ir::OperationIndex &index, const ir::IOperation &node); }; } // namespace dot diff --git a/runtime/onert/core/src/dumper/h5/Dumper.cc b/runtime/onert/core/src/dumper/h5/Dumper.cc new file mode 100644 index 000000000..5e12c2dbb --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/Dumper.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dumper.h" + +#include <iostream> +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +Dumper::Dumper(const std::string &filepath) : _file{filepath, H5F_ACC_CREAT | H5F_ACC_RDWR} {} + +} // namespace h5 +} // namespace dumper +} // namespace onert diff --git a/runtime/onert/core/src/dumper/h5/Dumper.h b/runtime/onert/core/src/dumper/h5/Dumper.h new file mode 100644 index 000000000..53d0e0332 --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/Dumper.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_DUMPER_H5_DUMPER_H__ +#define __ONERT_DUMPER_H5_DUMPER_H__ + +#include "exec/MinMaxMap.h" + +#include <H5Cpp.h> +#include <string> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +class Dumper +{ +public: + /** + * @brief Construct dumper + * + * @param[in] path filepath to dump + * @throw H5::FileIException on error during file open/create + */ + Dumper(const std::string &filepath); + +protected: + H5::H5File _file; +}; + +} // namespace h5 +} // namespace dumper +} // namespace onert + +#endif // __ONERT_DUMPER_H5_DUMPER_H__ diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc new file mode 100644 index 000000000..8a9de9f95 --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MinMaxDumper.h" + +#include <iostream> +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +static const char *h5_value_grpname = "value"; + +/* + * ensure grp_name exists in parent + */ +H5::Group ensureGroup(H5::Group parent, const char *child) +{ + H5::Exception::dontPrint(); + try + { + return parent.openGroup(child); + } + catch (H5::Exception &e) + { + return parent.createGroup(child); + } +} + +MinMaxDumper::MinMaxDumper(const std::string &filepath) : Dumper(filepath) +{ + auto root_grp = _file.openGroup("/"); + ensureGroup(root_grp, h5_value_grpname); +} + +void MinMaxDumper::dump(const exec::SMMinMaxMap &mmmap) const +{ + auto val_grp = _file.openGroup(h5_value_grpname); + auto num_run = val_grp.getNumObjs(); + auto num_grp = val_grp.createGroup(std::to_string(num_run)); + auto model_grp = ensureGroup(num_grp, "0"); + hsize_t dims[] = {2}; + H5::DataSpace dspace(1, dims); // rank=1, dim(0)=2, {min, max} + for (auto &&e : mmmap) + { + // key = {subg_idx, op_idx} = e.first + const auto subg_idx = e.first.first.value(); + const auto op_idx = e.first.second.value(); + auto subg_grp = ensureGroup(model_grp, std::to_string(subg_idx).c_str()); + auto op_dset = subg_grp.createDataSet(std::to_string(op_idx), H5::PredType::IEEE_F32BE, dspace); + op_dset.write(e.second.data, H5::PredType::NATIVE_FLOAT); + } +} + +} // namespace h5 +} // namespace dumper +} // namespace onert diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.h b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h new file mode 100644 index 000000000..1f1b27c6e --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ +#define __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ + +#include "exec/MinMaxMap.h" +#include "Dumper.h" + +#include <H5Cpp.h> +#include <string> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +// The hierachy of single model minmax h5 file +// +// GROUP / +// GROUP value +// └── GROUP run_idx +// └── GROUP model_idx +// └── GROUP subg_idx +// └── DATASET op_idx +// DATATYPE Float32 +// DATASPACE (2) +// DATA { min, max } +// GROUP name (optional, for debug) +// └── GROUP model_idx +// └── GROUP subg_idx +// └── ATTRIBUTE op_idx +// DATATYPE String +// DATA { "model/your/op/name"} +// +class MinMaxDumper : private Dumper +{ +public: + MinMaxDumper(const std::string &filepath); + /** + * @brief Dump minmax map + * + * @param[in] map single model minmax map + */ + void dump(const exec::SMMinMaxMap &map) const; + +private: + H5::Group _val_grp; +}; + +} // namespace h5 +} // namespace dumper +} // namespace onert + +#endif // __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.cc b/runtime/onert/core/src/dumper/text/GraphDumper.cc index 80cfbbc34..6bd7904aa 100644 --- a/runtime/onert/core/src/dumper/text/GraphDumper.cc +++ b/runtime/onert/core/src/dumper/text/GraphDumper.cc @@ -18,6 +18,9 @@ #include "ir/Graph.h" #include "compiler/LoweredGraph.h" +#ifdef ONERT_TRAIN +#include "compiler/train/LoweredTrainableGraph.h" +#endif // ONERT_TRAIN #include "util/logging.h" #include "misc/string_helpers.h" @@ -34,7 +37,7 @@ namespace std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq) { std::vector<std::string> strs; - for (auto ind : seq) + for (auto &&ind : seq) strs.push_back(dumper::text::formatOperandBrief(ind)); return nnfw::misc::join(strs.begin(), strs.end(), ", "); } @@ -56,10 +59,9 @@ std::string formatOperand(const ir::Graph &, ir::OperandIndex ind) return ss.str(); } -std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind) +std::string formatOperation(const ir::IOperation &op, ir::OperationIndex ind) { std::stringstream ss; - const auto &op = graph.operations().at(ind); ss << formatOperandIndexSequence(op.getOutputs()); ss << " = "; @@ -69,13 +71,21 @@ std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind) return ss.str(); } +std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind) +{ + std::stringstream ss; + const auto &op = graph.operations().at(ind); + return formatOperation(op, ind); +} + void dumpGraph(const ir::Graph &graph) { VERBOSE(GraphDumper) << "{\n"; auto ops_topol = graph.topolSortOperations(); - for (auto op_ind : ops_topol) + for (auto &&op_ind : ops_topol) { - VERBOSE(GraphDumper) << " " << formatOperation(graph, op_ind) << "\n"; + const auto &op = graph.operations().at(op_ind); + VERBOSE(GraphDumper) << " " << formatOperation(op, op_ind) << "\n"; } VERBOSE(GraphDumper) << "}\n"; VERBOSE(GraphDumper) << std::endl; @@ -87,6 +97,14 @@ void dumpLoweredGraph(const compiler::LoweredGraph &lgraph) dumpGraph(lgraph.graph()); } +#ifdef ONERT_TRAIN +void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph) +{ + // TODO Graph dump with backend info + dumpGraph(lgraph.graph()); +} +#endif // ONERT_TRAIN + } // namespace text } // namespace dumper } // namespace onert diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.h b/runtime/onert/core/src/dumper/text/GraphDumper.h index 0501ff050..ab0061465 100644 --- a/runtime/onert/core/src/dumper/text/GraphDumper.h +++ b/runtime/onert/core/src/dumper/text/GraphDumper.h @@ -24,7 +24,8 @@ namespace onert namespace ir { class Graph; -} +struct IOperation; +} // namespace ir } // namespace onert namespace onert @@ -32,7 +33,14 @@ namespace onert namespace compiler { class LoweredGraph; -} + +#ifdef ONERT_TRAIN +namespace train +{ +class LoweredTrainableGraph; +} // namespace train +#endif // ONERT_TRAIN +} // namespace compiler } // namespace onert namespace onert @@ -47,6 +55,9 @@ std::string formatOperand(const ir::Graph &, ir::OperandIndex ind); std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind); void dumpGraph(const ir::Graph &graph); void dumpLoweredGraph(const compiler::LoweredGraph &lgraph); +#ifdef ONERT_TRAIN +void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph); +#endif // ONERT_TRAIN } // namespace text } // namespace dumper diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc index 8dac1219e..e0b00077f 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.cc +++ b/runtime/onert/core/src/exec/DataflowExecutor.cc @@ -60,7 +60,7 @@ void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id) void DataflowExecutor::notify(uint32_t finished_job_id) { - for (auto id : _output_info[finished_job_id]) + for (auto &&id : _output_info[finished_job_id]) { assert(_input_info[id] > 0); auto count = --_input_info[id]; @@ -90,7 +90,7 @@ DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lower uint32_t next_job_index = 0; std::unordered_map<ir::OperationIndex, uint32_t> op_to_job; const auto &operations = _lowered_graph->graph().operations(); - operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) { + operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) { VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind << std::endl; _finished_jobs.emplace_back( @@ -102,12 +102,12 @@ DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lower _output_info.resize(next_job_index); _initial_input_info.resize(next_job_index, 0); - operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &op) { + operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { auto job_index = op_to_job[op_ind]; - for (auto output : op.getOutputs()) + for (auto &&output : op.getOutputs()) { // Update output and input info - operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::Operation &op_cur) { + operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::IOperation &op_cur) { if (op_cur.getInputs().contains(output)) { auto dep_index = op_to_job[op_cur_ind]; diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc index fb8058d23..78b21cf49 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc @@ -253,7 +253,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) So, only when all inputs are static, we can skip dynamic shape inference. */ bool all_static = true; - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); if (input->is_dynamic()) @@ -290,7 +290,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) auto first_input_ind = op.getInputs().at(0); auto first_input = _tensor_registry->getITensor(first_input_ind); - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); if (input != first_input && !isConcatible(first_input, input, op.param().axis)) @@ -300,7 +300,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) // getting output shape onert::shape_inference::Shapes in_shapes; - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); ir::Shape shape = input->getShape(); @@ -1042,7 +1042,7 @@ void DynamicShapeInferer::visit(const ir::operation::Split &op) // Return if all tensors are not dynamic bool has_dynamic = false; - for (const auto output_idx : op.getOutputs()) + for (const auto &output_idx : op.getOutputs()) { auto output = _tensor_registry->getITensor(output_idx); has_dynamic |= output->is_dynamic(); diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc index 1f7152e7b..939184e4e 100644 --- a/runtime/onert/core/src/exec/ExecTime.test.cc +++ b/runtime/onert/core/src/exec/ExecTime.test.cc @@ -34,7 +34,7 @@ struct MockConfig : public IConfig std::string id() override { return "b1"; } bool initialize() override { return true; }; bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override { return ir::Layout::UNKNOWN; } diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc index 7d5b406ef..1384c9fdc 100644 --- a/runtime/onert/core/src/exec/Execution.cc +++ b/runtime/onert/core/src/exec/Execution.cc @@ -16,6 +16,8 @@ #include "exec/Execution.h" +#include "train/TrainableExecutors.h" + #include "util/logging.h" namespace onert @@ -151,6 +153,35 @@ void Execution::waitFinish() bool Execution::isFinished(void) const { return finished; } +#ifdef ONERT_TRAIN +void Execution::train(uint32_t training_step) +{ + auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get()); + if (!execs) + { + throw std::runtime_error{"Supported only TrainableExecutors"}; + } + + VERBOSE(Execution) << "Start training" << std::endl; + + execs->train(_io_desc, training_step); + finished = true; + + VERBOSE(Execution) << "training finished" << std::endl; +} + +float Execution::getLoss(const ir::IOIndex &ind) +{ + auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get()); + if (!execs) + { + throw std::runtime_error{"Supported only TrainableExecutors"}; + } + + return execs->getLoss(ind); +} +#endif // ONERT_TRAIN + ir::Shape Execution::getInputShape(ir::IOIndex ind) const { auto itr = _io_desc.dynamic_input_shapes.find(ind); @@ -180,5 +211,16 @@ ir::Shape Execution::getOutputShape(ir::IOIndex ind) const return output_desc->info.shape(); } +size_t Execution::getInputTotalSize(ir::IOIndex ind) const +{ + // TODO Support dynamic shape + return _executors->inputInfo(ind).total_size(); +} + +size_t Execution::getOutputTotalSize(ir::IOIndex ind) const +{ + return _executors->outputInfo(ind).total_size(); +} + } // namespace exec } // namespace onert diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 9abde7ba4..5245518a0 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -28,7 +28,7 @@ namespace { -void setUserData(const onert::ir::Graph &g, const onert::ir::Operation *op, +void setUserData(const onert::ir::Graph &g, const onert::ir::IOperation *op, decltype(EventCollector::Event::userData) &data) { // From a tensor of shape [a, b, c], this will return a string "shape(a b c)". diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index 91fbac323..7e93ecf7c 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -24,7 +24,7 @@ #include "exec/IExecutor.h" #include "ir/Index.h" -#include "ir/Operation.h" +#include "ir/IOperation.h" #include "util/ITimer.h" #include "util/TracingCtx.h" diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index 515cf8e48..ad0073477 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -35,7 +35,7 @@ ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_gra { auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { assert(tensors.empty()); - for (auto ind : ind_seq) + for (auto &&ind : ind_seq) { backend::ITensor *tensor = tensor_regs.getITensor(ind); assert(tensor != nullptr); diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index 7aee3d9ee..4f97de922 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -77,6 +77,7 @@ public: { return _output_tensors; } + backend::BackendContexts &getBackendContexts() { return _backend_contexts; } protected: /** diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc index 7edd5aaa2..8a1be3df4 100644 --- a/runtime/onert/core/src/exec/Executors.cc +++ b/runtime/onert/core/src/exec/Executors.cc @@ -147,7 +147,7 @@ void Executors::checkSupportedMultimodel() const // Assumption: edges // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2' - for (auto edge : _model_edges->edges) + for (auto &&edge : _model_edges->edges) { auto const model_from = std::get<ir::ModelIndex>(edge.from); auto const model_to = std::get<ir::ModelIndex>(edge.to); diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc index f87c271f7..578123a54 100644 --- a/runtime/onert/core/src/exec/FunctionSequence.cc +++ b/runtime/onert/core/src/exec/FunctionSequence.cc @@ -16,7 +16,6 @@ #include "exec/FunctionSequence.h" -#include "ir/Operation.h" #include "backend/ITensorRegistry.h" #include "util/logging.h" diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index a833466da..cc073411a 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -52,7 +52,7 @@ public: const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx) : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx} { - for (auto index : order) + for (auto &&index : order) { _code.emplace_back(std::move(code_map.at(index))); } diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.cc b/runtime/onert/core/src/exec/MinMaxRecorder.cc new file mode 100644 index 000000000..88fc104d1 --- /dev/null +++ b/runtime/onert/core/src/exec/MinMaxRecorder.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MinMaxRecorder.h" + +#include "backend/ITensor.h" + +#include <cassert> +#include <cmath> + +namespace onert +{ +namespace exec +{ + +MinMaxRecorder::MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph, + const backend::BackendContexts &backend_contexts) + : _graph{graph}, _backend_contexts{backend_contexts}, _h5dumper(minmax_filepath) +{ +} + +void MinMaxRecorder::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_idx, + ir::OperationIndex op_idx, const backend::Backend *backend) +{ + const auto &tensor_reg = _backend_contexts.at(backend)->tensor_registry; + const auto &op = _graph.operations().at(op_idx); + const auto &outputs = op.getOutputs(); + // TODO: Support multiple output + if (outputs.size() != 1) + throw std::runtime_error("Only 1 output operator is supported for recording minmax."); + + auto tensor = tensor_reg->getITensor(outputs.at(0)); + + // Logic copied from MinMaxObserver.cpp. + + // Filter Ops + if (tensor->is_constant()) + return; + + if (tensor->data_type() != ir::DataType::FLOAT32) + return; + + switch (op.opcode()) + { + // Operators with multiple outputs + case ir::OpCode::If: + case ir::OpCode::Split: + case ir::OpCode::SplitV: + case ir::OpCode::TopKV2: + case ir::OpCode::Unpack: + case ir::OpCode::While: + return; + // NOTE: Sin, Cos, Tanh's output is in [-1, 1] + // We may not need to dump those operators. + default:; // Do Nothing + } + + // Otherwise, dump! + assert(tensor->data_type() == ir::DataType::FLOAT32); + const auto data = reinterpret_cast<float *>(tensor->buffer()); + const auto num_elements = tensor->total_size() / sizeof(float); + + float max = std::numeric_limits<float>::lowest(); + float min = std::numeric_limits<float>::max(); + + bool all_nan = true; + for (size_t i = 0; i < num_elements; ++i) + { + const float number = data[i]; + if (std::isnan(number)) + continue; + + if (number == std::numeric_limits<float>::lowest()) + continue; + + all_nan = false; + + if (number > max) + max = number; + + if (number < min) + min = number; + } + + if (all_nan) + throw std::runtime_error("All values are NaN(Not a Number)"); + + _minmax_map.append({subg_idx, op_idx}, min, max); +} + +void MinMaxRecorder::handleSubgraphEnd(ir::SubgraphIndex) +{ + // It would be better to dump at the end of model execution, not subgraph + // But it requires more changes than subgraph. + _h5dumper.dump(_minmax_map); +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.h b/runtime/onert/core/src/exec/MinMaxRecorder.h new file mode 100644 index 000000000..7a0817f5f --- /dev/null +++ b/runtime/onert/core/src/exec/MinMaxRecorder.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_MINMAX_RECORDER__ +#define __ONERT_EXEC_MINMAX_RECORDER__ + +#include "ExecutionObservers.h" +#include "ir/Index.h" +#include "exec/MinMaxMap.h" +#include "../dumper/h5/MinMaxDumper.h" + +#include <memory> + +namespace onert +{ +namespace exec +{ + +class MinMaxRecorder : public IExecutionObserver +{ +public: + MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph, + const backend::BackendContexts &backend_contexts); + void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override + { + return; + } + void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; + void handleSubgraphEnd(ir::SubgraphIndex) override; + +private: + const ir::Graph &_graph; + const backend::BackendContexts &_backend_contexts; + dumper::h5::MinMaxDumper _h5dumper; + SMMinMaxMap _minmax_map; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_MINMAX_RECORDER__ diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc index 456663f91..538945631 100644 --- a/runtime/onert/core/src/exec/ParallelScheduler.cc +++ b/runtime/onert/core/src/exec/ParallelScheduler.cc @@ -30,7 +30,7 @@ ParallelScheduler::ParallelScheduler(const BackendSet &backends) { assert(!backends.empty()); - for (auto backend : backends) + for (auto &&backend : backends) { _thread_pools[backend] = std::make_unique<ThreadPool>(); } diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.cc b/runtime/onert/core/src/exec/train/TrainableExecutor.cc new file mode 100644 index 000000000..9c7e70c29 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutor.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableExecutor.h" +#ifdef RUY_PROFILER +#include "ruy/profiler/instrumentation.h" +#endif + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace exec +{ +namespace train +{ + +TrainableExecutor::TrainableExecutor( + std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + backend::train::TrainableBackendContexts &&backend_contexts, + const compiler::train::TensorRegistries &tensor_regs, + compiler::train::TrainableCodeMap &&code_map, const std::vector<ir::OperationIndex> &order, + const util::TracingCtx *tracing_ctx) + : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)}, + _trainable_graph{_lowered_graph->trainable_graph()}, _tensor_regs{std::move(tensor_regs)}, + _mutex(), _tracing_ctx(tracing_ctx) +{ + auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { + assert(tensors.empty()); + for (auto &&ind : ind_seq) + { + backend::ITensor *tensor = tensor_regs.getITensor(ind); + assert(tensor != nullptr); + auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor); + tensors.push_back(io_tensor); + } + }; + build_tensor_list(_trainable_graph.getInputs(), _input_tensors); + build_tensor_list(_trainable_graph.getOutputs(), _output_tensors); + + for (auto &&index : order) + { + auto &trainable_code = code_map.at(index); + _code.emplace_back(std::move(trainable_code)); + } +} + +void TrainableExecutor::execute(const std::vector<backend::IPortableTensor *> &, + const std::vector<backend::IPortableTensor *> &) +{ + throw std::runtime_error("TrainableExecutor does not support multiple subgraphs yet"); +} + +void TrainableExecutor::forward(const IODescription &desc, bool training) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + // TODO Update IO tensors if desc has dynamic input + // Set input(s) + assert(_input_tensors.size() == desc.inputs.size()); + for (uint32_t i = 0; i < _input_tensors.size(); ++i) + { + auto tensor = _input_tensors[i]; + + // TODO Check if (desc.inputs[i] == nullptr) + // TODO Better design for ITensor? (we need const_cast as ITensor is writable) + tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), + desc.inputs[i]->size); + } + + if (!training) + { + // Set output(s) + assert(_output_tensors.size() == desc.outputs.size()); + for (uint32_t i = 0; i < _output_tensors.size(); ++i) + { + auto tensor = _output_tensors[i]; + + if (desc.outputs[i] == nullptr) + throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."}; + tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size); + } + } + + forwardImpl(training); + + // TODO Update output(s) desc if desc has dynamic input +} + +void TrainableExecutor::forwardImpl(bool training) +{ + if (_tracing_ctx) + { + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph()); + + _subject.notifySubgraphBegin(profiling_subg_index); + for (auto &&code : _code) + { + const auto backend = code.lower_info->backend(); +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend); + + auto &tn_seq = code.tn_seq; + tn_seq->forward(training); + + _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend); + } + _subject.notifySubgraphEnd(profiling_subg_index); + } + else + { + for (auto &&code : _code) + { +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + auto &tn_seq = code.tn_seq; + tn_seq->forward(training); + } + } +} + +void TrainableExecutor::backward(const IODescription &, uint32_t training_step) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + backwardImpl(training_step); +} + +void TrainableExecutor::backwardImpl(uint32_t training_step) +{ + if (_tracing_ctx) + { + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph()); + + _subject.notifySubgraphBegin(profiling_subg_index); + for (auto it = _code.rbegin(); it != _code.rend(); ++it) + { + const auto &code = *it; + const auto backend = code.lower_info->backend(); +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend); + + auto &tn_seq = code.tn_seq; + tn_seq->backward(training_step); + + _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend); + } + _subject.notifySubgraphEnd(profiling_subg_index); + } + else + { + for (auto it = _code.rbegin(); it != _code.rend(); ++it) + { + const auto &code = *it; +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + auto &tn_seq = code.tn_seq; + tn_seq->backward(training_step); + } + } +} + +float TrainableExecutor::getLoss(const ir::IOIndex &pred_io_ind) const +{ + const auto &loss_ind = _trainable_graph.getLossIndex(pred_io_ind); + if (loss_ind.undefined()) + throw std::runtime_error{"Loss " + std::to_string(loss_ind.value()) + " is not defined."}; + backend::ITensor *tensor = _tensor_regs.getITensor(loss_ind); + auto loss_buf = reinterpret_cast<float *>(tensor->buffer()); + return *loss_buf; +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.h b/runtime/onert/core/src/exec/train/TrainableExecutor.h new file mode 100644 index 000000000..6b645305f --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutor.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ +#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ + +#include "exec/IExecutor.h" + +#include "../ExecutionObservee.h" +#include "../../compiler/train/TensorRegistries.h" + +#include "backend/train/TrainableBackendContext.h" +#include "compiler/train/TrainableCodeMap.h" +#include "compiler/train/LoweredTrainableGraph.h" +#include "ir/Index.h" +#include "util/TracingCtx.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +class TrainableExecutor : public IExecutor +{ +public: + /** + * @brief Construct a new TrainableExecutor object + * @param lowered_graph LoweredTrainableGraph object + * @param tensor_builders Tensor builders that are currently used + * @param code_map @c ir::Operation and its code map + */ + TrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + backend::train::TrainableBackendContexts &&backend_contexts, + const compiler::train::TensorRegistries &tensor_regs, + compiler::train::TrainableCodeMap &&code_map, + const std::vector<ir::OperationIndex> &order, + const util::TracingCtx *tracing_ctx); + +public: + const ir::Graph &graph() const final { return _trainable_graph.graph(); } + + void execute(const IODescription &desc) override { forward(desc, false); }; + + void execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) override; + + void forward(const IODescription &desc, bool training); + void backward(const IODescription &desc, uint32_t training_step); + + // Used only in Dataflow and Parallel Executors + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final + { + _indexed_ranks = std::move(ranks); + }; + + void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; + + const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override + { + return _input_tensors; + } + + const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override + { + return _output_tensors; + } + + float getLoss(const ir::IOIndex &pred_io_ind) const; + + backend::train::TrainableBackendContexts &getBackendContexts() { return _backend_contexts; } + +private: + void forwardImpl(bool training); + void backwardImpl(uint32_t training_step); + +private: + std::vector<compiler::train::TrainableCodeAndInfo> _code; + ExecutionObservee _subject; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; + std::unique_ptr<compiler::train::LoweredTrainableGraph> _lowered_graph; + backend::train::TrainableBackendContexts _backend_contexts; + const ir::train::TrainableGraph &_trainable_graph; + compiler::train::TensorRegistries _tensor_regs; + std::vector<backend::builtin::IOTensor *> _input_tensors; + std::vector<backend::builtin::IOTensor *> _output_tensors; + std::mutex _mutex; + const util::TracingCtx *_tracing_ctx; +}; + +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.cc b/runtime/onert/core/src/exec/train/TrainableExecutors.cc new file mode 100644 index 000000000..ba39bf0f0 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutors.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableExecutors.h" + +#include "../../backend/builtin/IOTensor.h" + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace exec +{ +namespace train +{ + +void TrainableExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) +{ + std::unique_ptr<TrainableExecutor> t_exec{ + nnfw::misc::polymorphic_downcast<TrainableExecutor *>(exec.release())}; + _executors.emplace(subg_index, std::move(t_exec)); +} + +TrainableExecutor *TrainableExecutors::at(const ir::ModelIndex &, + const ir::SubgraphIndex &subg_index) const +{ + return _executors.at(subg_index).get(); +} + +uint32_t TrainableExecutors::inputSize() const { return entryExecutor()->getInputTensors().size(); } + +uint32_t TrainableExecutors::outputSize() const +{ + return entryExecutor()->getOutputTensors().size(); +} + +const ir::OperandInfo &TrainableExecutors::inputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getInputTensors().at(index.value())->orig_info(); +} + +const ir::OperandInfo &TrainableExecutors::outputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getOutputTensors().at(index.value())->orig_info(); +} + +void TrainableExecutors::execute(const IODescription &desc) +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + entryExecutor()->forward(desc, false); + + // TODO Support multple executors +} + +void TrainableExecutors::train(const IODescription &desc, uint32_t training_step) +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + entryExecutor()->forward(desc, true); + entryExecutor()->backward(desc, training_step); + + // TODO Support multple executors +} + +float TrainableExecutors::getLoss(const ir::IOIndex &index) const +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + return entryExecutor()->getLoss(index); +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.h b/runtime/onert/core/src/exec/train/TrainableExecutors.h new file mode 100644 index 000000000..db6d198b1 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutors.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ +#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ + +#include "TrainableExecutor.h" +#include "exec/IExecutors.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +/** + * @brief Class to gather executor set for trainable model NN package + */ +class TrainableExecutors : public IExecutors +{ +public: + /** + * @brief Construct a new TrainableExecutors object + */ + TrainableExecutors(void) = default; + TrainableExecutors(const TrainableExecutors &) = delete; + TrainableExecutors(TrainableExecutors &&) = default; + + /** + * @brief Destroy the TrainableExecutors object + */ + ~TrainableExecutors() = default; + +public: + TrainableExecutors &operator=(const TrainableExecutors &) = delete; + TrainableExecutors &operator=(TrainableExecutors &&) = default; + +public: + void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) override; + + TrainableExecutor *at(const ir::ModelIndex &model_index, + const ir::SubgraphIndex &subg_index) const override; + + TrainableExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); } + + uint32_t inputSize() const override; + + uint32_t outputSize() const override; + + const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override; + + const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override; + + void execute(const IODescription &desc) override; + + /** + * @brief Train + * + * @param desc IO information + * @param training_step The number of iterations of an training process. + * In other words, the number of gradient update. + */ + void train(const IODescription &desc, uint32_t training_step); + + float getLoss(const ir::IOIndex &index) const; + +private: + // TODO Append model index to ModelIndex + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<TrainableExecutor>> _executors; +}; + +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ diff --git a/runtime/onert/core/src/exec/train/TrainableFnSequence.cc b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc new file mode 100644 index 000000000..084b3d708 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/train/TrainableFnSequence.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +void TrainableFnSequence::forward(bool training) +{ + for (const auto &function : _functions) + { + function->forward(training); + } +} + +void TrainableFnSequence::backward(uint32_t training_step) +{ + for (auto it = _functions.rbegin(); it != _functions.rend(); ++it) + { + (*it)->backward(); + } + + for (const auto &applier : _appliers) + { + applier->applyGradient(training_step); + } +} + +void TrainableFnSequence::append(std::unique_ptr<ITrainableFunction> &&function) +{ + _functions.push_back(std::move(function)); +} + +void TrainableFnSequence::append(std::unique_ptr<IGradientApplier> &&applier) +{ + _appliers.push_back(std::move(applier)); +} + +void TrainableFnSequence::iterate(const std::function<void(ITrainableFunction &)> &fn) +{ + for (const auto &func : _functions) + { + fn(*func); + } +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc new file mode 100644 index 000000000..72b581bf6 --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/train/optimizer/OptimizerCode.h" + +#include <unordered_map> + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +std::string toString(OptimizerCode code) +{ + static const std::unordered_map<OptimizerCode, const char *> map{ + {OptimizerCode::Invalid, "Invalid"}, + {OptimizerCode::SGD, "SGD"}, + {OptimizerCode::Adam, "Adam"}}; + return map.at(code); +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h new file mode 100644 index 000000000..66a08b50f --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ +#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ + +#include "backend/IPortableTensor.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +template <typename T, typename L> +void elementwise(const ir::Shape &shape, const backend::ITensor &src, backend::ITensor &dst, + const L &f) +{ + ShapeLoop(shape, [&](const ir::Coordinates &coords) { + const T src_val = *reinterpret_cast<const T *>(src.buffer() + src.calcOffset(coords)); + T *dst_data = reinterpret_cast<T *>(dst.buffer() + dst.calcOffset(coords)); + *dst_data = f(src_val, *dst_data); + }); +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ diff --git a/runtime/onert/core/src/exec/train/optimizer/SGD.cc b/runtime/onert/core/src/exec/train/optimizer/SGD.cc new file mode 100644 index 000000000..abfbc1b4b --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/SGD.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <exec/train/optimizer/SGD.h> + +#include "OptimizerHelpers.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +double SGD::getLearningRate(uint32_t) const +{ + // TODO Use iteration, momentum, and nesterov + return _learning_rate; +} + +void SGD::applyGradient(const UpdateFactors &factors) const +{ + const auto lr = getLearningRate(std::get<size_t>(factors)); + const auto &grad_tensor = std::get<const backend::IPortableTensor &>(factors); + auto &trainable_tensor = std::get<backend::train::ITrainableTensor &>(factors); + assert(trainable_tensor.data_type() == grad_tensor.data_type()); + + const auto shape = trainable_tensor.getShape(); + const auto &grad_shape = grad_tensor.get_info().shape(); + + // TODO Support for different shapes + if (shape != grad_shape) + { + throw std::runtime_error("SGD: Invalid gradient tensor"); + } + + switch (grad_tensor.data_type()) + { + case ir::DataType::FLOAT32: + elementwise<float>(shape, grad_tensor, trainable_tensor, + [&](float src, float dst) -> float { return dst - src * lr; }); + break; + default: + throw std::runtime_error("SGD: Not supported data type"); + } +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc index 28cf4137d..ef0f988fa 100644 --- a/runtime/onert/core/src/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.cc @@ -42,33 +42,33 @@ OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&op return _operands.push(std::move(operand), index); } -bool Graph::checkOperandsForOperation(const Operation &operation) +bool Graph::checkOperandsForOperation(const IOperation &operation) { auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; - for (auto input : inputs) + for (auto &&input : inputs) if (!operands().exist(input)) return false; - for (auto input : outputs) + for (auto &&input : outputs) if (!operands().exist(input)) return false; return true; } -void Graph::linkOperandToOperation(OperationIndex index, const Operation &operation) +void Graph::linkOperandToOperation(OperationIndex index, const IOperation &operation) { auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; - for (auto input : inputs) + for (auto &&input : inputs) operands().at(input).insertUse(index); - for (auto output : outputs) + for (auto &&output : outputs) operands().at(output).setDef(index); } -OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&operation) +OperationIndex Graph::addOperation(std::unique_ptr<IOperation> &&operation) { - const Operation &op_ref = *operation; + const IOperation &op_ref = *operation; if (!checkOperandsForOperation(op_ref)) return OperationIndex{}; auto ind = _operations.push(std::move(operation)); @@ -77,9 +77,9 @@ OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&operation) return ind; } -OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<Operation> &&operation) +OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation) { - const Operation &op_ref = *operation; + const IOperation &op_ref = *operation; if (!checkOperandsForOperation(op_ref)) return OperationIndex{}; auto ind_gen = _operations.push(std::move(operation), index); @@ -91,12 +91,35 @@ OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<Operati return index; } +OperationIndex Graph::replaceOperation(OperationIndex index, + std::unique_ptr<IOperation> &&operation) +{ + const IOperation &op_ref = *operation; + if (!checkOperandsForOperation(op_ref) || !_operations.exist(index)) + return OperationIndex{}; + + // Check the new operation has the same inputs/outputs as the existing operation + const auto &old_op = _operations.at(index); + if (!(old_op.getInputs() == op_ref.getInputs() && old_op.getOutputs() == op_ref.getOutputs())) + { + return OperationIndex{}; + } + + return _operations.set(index, std::move(operation)); +} + void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data) { assert(_operands.exist(ind)); _operands.at(ind).data(std::move(data)); } +void Graph::changeShape(const OperandIndex &ind, const ir::Shape &new_shape) +{ + assert(_operands.exist(ind)); + _operands.at(ind).info().shape(new_shape); +} + void Graph::addInput(const OperandIndex &ind, const std::string &name) { if (!name.empty()) @@ -123,7 +146,7 @@ IOIndex Graph::getOutputIndex(const std::string &name) const return (itr == _name_to_output.end()) ? IOIndex{} : itr->second; } -void Graph::verify(void) +void Graph::verify(void) const { // Call graph verifications for the MODEL phase { @@ -144,14 +167,14 @@ void Graph::verify(void) void Graph::initializeUseDef() { - operations().iterate([&](const OperationIndex &index, const Operation &node) -> void { + operations().iterate([&](const OperationIndex &index, const IOperation &node) -> void { auto outputs = node.getOutputs(); - for (auto output : outputs | ir::Remove::UNDEFINED) + for (auto &&output : outputs | ir::Remove::UNDEFINED) { operands().at(output).setDef(index); } - for (auto input : node.getInputs() | ir::Remove::UNDEFINED) + for (auto &&input : node.getInputs() | ir::Remove::UNDEFINED) { operands().at(input).insertUse(index); } @@ -163,15 +186,15 @@ std::vector<ir::OperationIndex> Graph::topolSortOperations() const std::vector<ir::OperationIndex> ret; util::Set<ir::OperationIndex> unvisited; operations().iterate( - [&](const ir::OperationIndex &index, const ir::Operation &) { unvisited.add(index); }); + [&](const ir::OperationIndex &index, const ir::IOperation &) { unvisited.add(index); }); - std::function<void(const ir::OperationIndex &, const ir::Operation &)> dfs = - [&](const ir::OperationIndex &index, const ir::Operation &op) -> void { + std::function<void(const ir::OperationIndex &, const ir::IOperation &)> dfs = + [&](const ir::OperationIndex &index, const ir::IOperation &op) -> void { if (!unvisited.contains(index)) return; unvisited.remove(index); - for (const auto output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { const auto &operand = operands().at(output); for (const auto &use : operand.getUses()) diff --git a/runtime/onert/core/src/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.cc index bd3f438ad..732460aa2 100644 --- a/runtime/onert/core/src/ir/LayoutSet.cc +++ b/runtime/onert/core/src/ir/LayoutSet.cc @@ -23,7 +23,7 @@ namespace ir LayoutSet::LayoutSet(std::initializer_list<Layout> layouts) { - for (auto layout : layouts) + for (auto &&layout : layouts) { _set.insert(layout); } @@ -32,7 +32,7 @@ LayoutSet::LayoutSet(std::initializer_list<Layout> layouts) LayoutSet LayoutSet::operator|(const LayoutSet &other) const { auto ret = *this; - for (auto layout : other) + for (auto &&layout : other) { ret.add(layout); } @@ -42,7 +42,7 @@ LayoutSet LayoutSet::operator|(const LayoutSet &other) const LayoutSet LayoutSet::operator&(const LayoutSet &other) const { LayoutSet ret; - for (auto layout : other) + for (auto &&layout : other) { if (contains(layout)) { @@ -55,7 +55,7 @@ LayoutSet LayoutSet::operator&(const LayoutSet &other) const LayoutSet LayoutSet::operator-(const LayoutSet &other) const { auto ret = *this; - for (auto layout : other) + for (auto &&layout : other) { ret.remove(layout); } diff --git a/runtime/onert/core/src/ir/LayoutSet.h b/runtime/onert/core/src/ir/LayoutSet.h index 6ce4e38c6..be077f2f0 100644 --- a/runtime/onert/core/src/ir/LayoutSet.h +++ b/runtime/onert/core/src/ir/LayoutSet.h @@ -17,6 +17,7 @@ #ifndef __ONERT_IR_LAYOUT_SET_H__ #define __ONERT_IR_LAYOUT_SET_H__ +#include <cstdint> #include <initializer_list> #include <unordered_set> diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc index b092f5cee..a15b6d0d6 100644 --- a/runtime/onert/core/src/ir/OperandIndexSequence.cc +++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc @@ -31,7 +31,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> l OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list) { - for (auto val : list) + for (auto &&val : list) { _vec.emplace_back(static_cast<uint32_t>(val)); } @@ -39,7 +39,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list) OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list) { - for (auto val : list) + for (auto &&val : list) { _vec.emplace_back(val); } @@ -55,6 +55,11 @@ void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex std::replace(_vec.begin(), _vec.end(), from, to); } +bool OperandIndexSequence::operator==(const OperandIndexSequence &other) const +{ + return _vec == other._vec; +} + OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence &other) const { OperandIndexSequence ret = *this; diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc index c06315814..64e1cc807 100644 --- a/runtime/onert/core/src/ir/OperationCloner.cc +++ b/runtime/onert/core/src/ir/OperationCloner.cc @@ -57,7 +57,7 @@ std::unique_ptr<Operation> OperationCloner::releaseClone() } // namespace -std::unique_ptr<Operation> clone(const Operation &operation) +std::unique_ptr<Operation> clone(const IOperation &operation) { OperationCloner cloner; operation.accept(cloner); diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h index 6424549e9..49297a05c 100644 --- a/runtime/onert/core/src/ir/OperationCloner.h +++ b/runtime/onert/core/src/ir/OperationCloner.h @@ -26,7 +26,7 @@ namespace onert namespace ir { -std::unique_ptr<Operation> clone(const Operation &operation); +std::unique_ptr<Operation> clone(const IOperation &operation); } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index 0b596ff13..5e6d700f3 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -202,6 +202,14 @@ void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); } +void OperationDumper::visit(const Loss &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Prediction(" << node.getInputs().at(Loss::Input::Y_PRED) << ") True(" + << node.getInputs().at(Loss::Input::Y_TRUE) << ")" << std::endl; + VERBOSE(LIR) << " - Outputs : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} + void OperationDumper::visit(const LSTM &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h index fe18307b9..99bf869d5 100644 --- a/runtime/onert/core/src/ir/OperationDumper.h +++ b/runtime/onert/core/src/ir/OperationDumper.h @@ -55,6 +55,7 @@ public: void visit(const operation::InstanceNorm &) override; void visit(const operation::L2Normalization &) override; void visit(const operation::LocalResponseNormalization &) override; + void visit(const operation::Loss &node) override; void visit(const operation::LSTM &) override; void visit(const operation::Pack &) override; void visit(const operation::Pad &) override; diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc index 094dbc0d5..cf7323d77 100644 --- a/runtime/onert/core/src/ir/OperationValidator.cc +++ b/runtime/onert/core/src/ir/OperationValidator.cc @@ -38,7 +38,7 @@ OperationValidator::OperationValidator(const Graph &graph) void OperationValidator::operator()() { - _operations.iterate([&](const OperationIndex &, const Operation &node) { node.accept(*this); }); + _operations.iterate([&](const OperationIndex &, const IOperation &node) { node.accept(*this); }); } DataType OperationValidator::operandType(const OperandIndex &idx) @@ -75,7 +75,7 @@ bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &ty bool OperationValidator::isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types) { - for (auto type_to_check : valid_types) + for (auto &&type_to_check : valid_types) { if (isValidType(idx, type_to_check)) { @@ -163,7 +163,7 @@ void OperationValidator::visit(const operation::Concat &node) { const auto output_index{node.getOutputs().at(0)}; - for (auto input_index : node.getInputs()) + for (auto &&input_index : node.getInputs()) { OP_REQUIRES(isSameType(input_index, output_index)); diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc index e7e0c88cf..1b4691f58 100644 --- a/runtime/onert/core/src/ir/Operations.cc +++ b/runtime/onert/core/src/ir/Operations.cc @@ -26,7 +26,7 @@ namespace ir Operations::Operations(const Operations &obj) { obj.iterate( - [&](const OperationIndex &index, const Operation &op) { _objects.emplace(index, clone(op)); }); + [&](const OperationIndex &index, const IOperation &op) { _objects.emplace(index, clone(op)); }); _next_index = obj._next_index; } diff --git a/runtime/onert/core/src/ir/operation/Loss.cc b/runtime/onert/core/src/ir/operation/Loss.cc new file mode 100644 index 000000000..fa3520b2c --- /dev/null +++ b/runtime/onert/core/src/ir/operation/Loss.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Loss.h" +#include "ir/OperationVisitor.h" + +#include <unordered_map> + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void Loss::accept(OperationVisitor &v) const { v.visit(*this); } + +Loss::Loss(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createAtLeast(2u), inputs, outputs}, _param{param} +{ + if (param.op_type == Type::CATEGORICAL_CROSSENTROPY) + { + assert(inputs.size() == 2 && "CategoricalCrossentropy Loss has 2 inputs"); + } +} + +std::string Loss::name() const +{ + using LossType = onert::ir::operation::Loss::Type; + static const std::unordered_map<Type, std::string> name_map{ + {LossType::MEAN_SQUARED_ERROR, "MeanSquaredError Loss"}, + {LossType::CATEGORICAL_CROSSENTROPY, "CategoricalCrossentropy Loss"}}; + return name_map.at(_param.op_type); +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/TrainableGraph.cc b/runtime/onert/core/src/ir/train/TrainableGraph.cc new file mode 100644 index 000000000..781f04956 --- /dev/null +++ b/runtime/onert/core/src/ir/train/TrainableGraph.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/TrainableGraph.h" +#include "util/Utils.h" + +#include <algorithm> +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace ir +{ +namespace train +{ + +TrainableGraph::TrainableGraph() : _graph{} {} + +TrainableGraph::TrainableGraph(const TrainableGraph &tgraph) + : _graph{tgraph._graph}, _derivatives{tgraph._derivatives}, _losses{tgraph._losses} +{ + tgraph.operations().iterate( + [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) { + replaceOperation(index, dynamic_cast<const ITrainableOperation &>(op).clone()); + }); +} + +TrainableGraph::TrainableGraph(const Graph &graph) : _graph{graph} {} + +OperandIndex TrainableGraph::addOperand(const Shape &shape, const TypeInfo &type) +{ + return _graph.addOperand(shape, type); +} + +OperandIndex TrainableGraph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand) +{ + return _graph.addOperand(index, std::move(operand)); +} + +OperationIndex TrainableGraph::addOperation(std::unique_ptr<ITrainableOperation> &&operation) +{ + return _graph.addOperation(std::move(operation)); +} + +OperationIndex TrainableGraph::replaceOperation(OperationIndex index, + std::unique_ptr<ITrainableOperation> &&operation) +{ + return _graph.replaceOperation(index, std::move(operation)); +} + +OperandIndex TrainableGraph::addDerivative(OperandIndex index, + std::unique_ptr<Operand> &&derivative) +{ + return _derivatives.push(std::move(derivative), index); +} + +IOIndex TrainableGraph::getInputIndex(const std::string &name) const +{ + return _graph.getInputIndex(name); +} + +IOIndex TrainableGraph::getOutputIndex(const std::string &name) const +{ + return _graph.getOutputIndex(name); +} + +void TrainableGraph::changeShape(const OperandIndex &index, const ir::Shape &new_shape) +{ + _graph.changeShape(index, new_shape); +} + +void TrainableGraph::changeDerivativeShape(const OperandIndex &index, const ir::Shape &new_shape) +{ + assert(_derivatives.exist(index)); + _derivatives.at(index).info().shape(new_shape); +} + +void TrainableGraph::addInput(const OperandIndex &ind, const std::string &name) +{ + _graph.addInput(ind, name); +} + +void TrainableGraph::addOutput(const OperandIndex &ind, const std::string &name) +{ + _graph.addOutput(ind, name); +} + +void TrainableGraph::verify(void) const +{ + _graph.verify(); + + operations().iterate([](const onert::ir::OperationIndex &, const onert::ir::IOperation &op) { + try + { + UNUSED_RELEASE(dynamic_cast<const onert::ir::train::ITrainableOperation &>(op)); + } + catch (const std::bad_cast &) + { + std::runtime_error("TrainableGraph: " + op.name() + " is not a trainable operation"); + } + }); +} + +void TrainableGraph::removeOperand(const OperandIndex &ind) { _graph.removeOperand(ind); } + +void TrainableGraph::setLayout(Layout layout) { _graph.setLayout(layout); } + +const ITrainableOperation &TrainableGraph::operation(OperationIndex index) const +{ + // NOTE Virtual inherited objects cannot be static_casted. + return dynamic_cast<const ITrainableOperation &>(_graph.operations().at(index)); +} + +std::vector<ir::OperationIndex> TrainableGraph::topolSortOperations() const +{ + return _graph.topolSortOperations(); +} + +void TrainableGraph::addLoss(const OperandIndex &loss_ind, const IOIndex &pred_ioind) +{ + _losses.emplace(pred_ioind, loss_ind); +} + +OperandIndex TrainableGraph::getLossIndex(const IOIndex &pred_ioind) const +{ + auto itr = _losses.find(pred_ioind); + return (itr == _losses.end()) ? OperandIndex{} : itr->second; +} + +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Conv2D.cc b/runtime/onert/core/src/ir/train/operation/Conv2D.cc new file mode 100644 index 000000000..923861ae3 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Conv2D.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Conv2D.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Conv2D::clone() const +{ + return std::make_unique<Conv2D>(*this); +} + +void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); } + +void Conv2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Conv2D::Conv2D(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc new file mode 100644 index 000000000..1dae3f674 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/ElementwiseActivation.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> ElementwiseActivation::clone() const +{ + return std::make_unique<ElementwiseActivation>(*this); +} + +void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); } + +void ElementwiseActivation::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +ElementwiseActivation::ElementwiseActivation(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/FullyConnected.cc b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc new file mode 100644 index 000000000..a26f7c489 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/FullyConnected.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> FullyConnected::clone() const +{ + return std::make_unique<FullyConnected>(*this); +} + +void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); } + +void FullyConnected::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +FullyConnected::FullyConnected(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Loss.cc b/runtime/onert/core/src/ir/train/operation/Loss.cc new file mode 100644 index 000000000..abd79929b --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Loss.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Loss.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Loss::clone() const { return std::make_unique<Loss>(*this); } + +void Loss::accept(OperationVisitor &v) const { v.visit(*this); } + +void Loss::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Loss::Loss(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Permute.cc b/runtime/onert/core/src/ir/train/operation/Permute.cc new file mode 100644 index 000000000..adc23aa49 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Permute.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Permute.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Permute::clone() const +{ + return std::make_unique<Permute>(*this); +} + +void Permute::accept(OperationVisitor &v) const { v.visit(*this); } + +void Permute::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Permute::Permute(const OperationType &operation) + : OperationType{operation.getInputs().at(0), operation.getOutputs().at(0), + operation.getPermuteType()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Pool2D.cc b/runtime/onert/core/src/ir/train/operation/Pool2D.cc new file mode 100644 index 000000000..021574f19 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Pool2D.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Pool2D.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Pool2D::clone() const +{ + return std::make_unique<Pool2D>(*this); +} + +void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } + +void Pool2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Pool2D::Pool2D(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Reshape.cc b/runtime/onert/core/src/ir/train/operation/Reshape.cc new file mode 100644 index 000000000..c76158607 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Reshape.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Reshape.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Reshape::clone() const +{ + return std::make_unique<Reshape>(*this); +} + +void Reshape::accept(OperationVisitor &v) const { v.visit(*this); } + +void Reshape::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Reshape::Reshape(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Softmax.cc b/runtime/onert/core/src/ir/train/operation/Softmax.cc new file mode 100644 index 000000000..dbd403879 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Softmax.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Softmax.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Softmax::clone() const +{ + return std::make_unique<Softmax>(*this); +} + +void Softmax::accept(OperationVisitor &v) const { v.visit(*this); } + +void Softmax::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Softmax::Softmax(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc index 25a82d5a2..6260d29ff 100644 --- a/runtime/onert/core/src/ir/verifier/Verifier.cc +++ b/runtime/onert/core/src/ir/verifier/Verifier.cc @@ -39,11 +39,11 @@ bool DAGChecker::verify(const Graph &graph) const noexcept OperationIndexMap<bool> visited; operations.iterate( - [&](const OperationIndex &index, const Operation &) { visited[index] = false; }); + [&](const OperationIndex &index, const IOperation &) { visited[index] = false; }); OperationIndexMap<bool> on_stack = visited; // Copy from visited - std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive = - [&](const OperationIndex &index, const Operation &node) -> void { + std::function<void(const OperationIndex &index, const IOperation &)> dfs_recursive = + [&](const OperationIndex &index, const IOperation &node) -> void { if (on_stack[index]) cyclic = true; if (visited[index]) @@ -51,7 +51,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept visited[index] = true; on_stack[index] = true; - for (auto output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED) + for (auto &&output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED) { const auto &operand = graph.operands().at(output); for (const auto &use : operand.getUses()) @@ -76,8 +76,8 @@ bool EdgeChecker::verify(const Graph &graph) const noexcept { auto &operations = graph.operations(); uint32_t errors = 0; - operations.iterate([&](const OperationIndex &index, const Operation &node) { - for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED) + operations.iterate([&](const OperationIndex &index, const IOperation &node) { + for (auto &&operand_index : node.getInputs() | ir::Remove::UNDEFINED) { try { @@ -98,7 +98,7 @@ bool EdgeChecker::verify(const Graph &graph) const noexcept errors += 1; } } - for (auto operand_index : node.getOutputs() | ir::Remove::UNDEFINED) + for (auto &&operand_index : node.getOutputs() | ir::Remove::UNDEFINED) { try { @@ -127,7 +127,7 @@ bool EdgeChecker::verify(const Graph &graph) const noexcept bool InputOutputChecker::verify(const Graph &graph) const noexcept { - for (auto operand_ind : + for (auto &&operand_ind : (graph.getInputs() + graph.getOutputs()) | Remove::DUPLICATED | Remove::UNDEFINED) { if (!graph.operands().exist(operand_ind)) diff --git a/runtime/onert/core/src/odc/QuantizeManager.cc b/runtime/onert/core/src/odc/QuantizeManager.cc new file mode 100644 index 000000000..71572a7e0 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizeManager.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" +#include "odc/QuantizeManager.h" + +#include <iostream> +#include <mutex> + +namespace onert +{ +namespace odc +{ + +bool QuantizeManager::quantize() +{ + // Compile function is thread-unsafe + static std::mutex lock; + std::lock_guard<std::mutex> guard(lock); + + if (_export_model_path.empty()) + throw std::runtime_error("Export model path is not set"); + + auto &quantize_loader = QuantizerLoader::instance(); + if (quantize_loader.loadLibrary() != 0) + return false; + + auto quantizer = quantize_loader.get(); + auto result = quantizer->quantize(_model_path.c_str(), _export_model_path.c_str(), _is_q16); + + // TODO Unload quantize library to reduce memory usage + + return (result == 0); +} + +} // namespace odc +} // namespace onert diff --git a/runtime/onert/core/src/odc/QuantizeManager.test.cc b/runtime/onert/core/src/odc/QuantizeManager.test.cc new file mode 100644 index 000000000..4e155a6ef --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizeManager.test.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "odc/QuantizeManager.h" + +#include <gtest/gtest.h> + +using namespace onert::odc; + +// Test export model path is not set +TEST(odc_QuantizeManager, neg_export_model_path) +{ + QuantizeManager manager("model_path"); + ASSERT_THROW(manager.quantize(), std::runtime_error); +} + +// Test invalid model path +TEST(odc_QuantizeManager, neg_invalid_model_path) +{ + QuantizeManager manager("invalid_model_path.circle"); + manager.exportModelPath("export_model_path.circle"); + ASSERT_EQ(manager.quantize(), false); +} diff --git a/runtime/onert/core/src/odc/QuantizerLoader.cc b/runtime/onert/core/src/odc/QuantizerLoader.cc new file mode 100644 index 000000000..8a972e97e --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" + +#include <dlfcn.h> +#include <iostream> +#include <string> + +static const char *SHARED_LIB_EXT = +#if defined(__APPLE__) && defined(__MACH__) + ".dylib"; +#else + ".so"; +#endif + +namespace onert +{ +namespace odc +{ + +QuantizerLoader &QuantizerLoader::instance() +{ + static QuantizerLoader singleton; + return singleton; +} + +int32_t QuantizerLoader::loadLibrary() +{ + if (get() != nullptr) + return 0; + + const std::string quantize_so = std::string("libonert_odc") + SHARED_LIB_EXT; + void *handle = dlopen(quantize_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + auto dlerror_msg = dlerror(); + + if (handle == nullptr) + { + std::cerr << "Failed to load " << quantize_so << std::endl; + std::cerr << dlerror_msg << std::endl; + return 1; + } + + { + const char *factory_name = "create_quantizer"; + auto factory = (factory_t)dlsym(handle, factory_name); + dlerror_msg = dlerror(); + + if (factory == nullptr) + { + std::cerr << "QuantizerLoader: unable to find function " << factory_name << dlerror_msg + << std::endl; + dlclose(handle); + return 1; + } + + auto destroyer = (quantizer_destory_t)dlsym(handle, "destroy_quantizer"); + _quantizer = std::unique_ptr<IQuantizer, quantizer_destory_t>(factory(), destroyer); + + if (_quantizer == nullptr) + { + std::cerr << "QuantizerLoader: unable to create quantizer" << std::endl; + dlclose(handle); + return 1; + } + } + + // Save quantize library handle (avoid warning by handle lost without dlclose()) + // clang-format off + _dlhandle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [filename = quantize_so](void *h) { + if (dlclose(h) != 0) + std::cerr << "Failed to unload backend " << filename << std::endl; + }}; + // clang-format on + + return 0; +} + +int32_t QuantizerLoader::unloadLibrary() +{ + if (get() == nullptr) + return 0; + + _quantizer.reset(nullptr); + _dlhandle.reset(nullptr); + + return 0; +} + +} // namespace odc +} // namespace onert diff --git a/runtime/onert/core/src/odc/QuantizerLoader.h b/runtime/onert/core/src/odc/QuantizerLoader.h new file mode 100644 index 000000000..36a9f2996 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_ODC_QUANTIZER_LOADER_H__ +#define __ONERT_ODC_QUANTIZER_LOADER_H__ + +#include "odc/IQuantizer.h" + +#include <functional> +#include <memory> + +namespace onert +{ +namespace odc +{ + +/** + * @brief Class to manage loading and unloading of dynamic library containing + * implementation of IQuantizer interface + */ +class QuantizerLoader +{ +public: + /** + * @brief Typedef for function pointer to destroy loaded library handle + */ + using dlhandle_destroy_t = std::function<void(void *)>; + /** + * @brief Typedef for function pointer to create instance of IQuantizer + */ + using factory_t = IQuantizer *(*)(); + /** + * @brief Typedef for function pointer to destroy instance of IQuantizer + */ + using quantizer_destory_t = void (*)(IQuantizer *); + + /** + * @brief Get singleton instance of QuantizerLoader + * @return Reference to singleton instance of QuantizerLoader + */ + static QuantizerLoader &instance(); + +private: + // Cannot create instance of QuantizerLoader outside of this class + QuantizerLoader() = default; + QuantizerLoader(QuantizerLoader const &) = delete; + QuantizerLoader &operator=(QuantizerLoader const &) = delete; + ~QuantizerLoader() = default; + +public: + /** + * @brief Load dynamic library containing implementation of IQuantizer + * @return 0 if success, otherwise errno value + */ + int32_t loadLibrary(); + /** + * @brief Unload dynamic library containing implementation of IQuantizer + * @return 0 if success, otherwise errno value + */ + int32_t unloadLibrary(); + /** + * @brief Get instance of IQuantizer created through factory method + * @return Pointer to instance of IQuantizer + */ + IQuantizer *get() const { return _quantizer.get(); } + +private: + // Note: Keep handle to avoid svace warning of "handle lost without dlclose()" + std::unique_ptr<void, dlhandle_destroy_t> _dlhandle; + std::unique_ptr<IQuantizer, quantizer_destory_t> _quantizer{nullptr, nullptr}; +}; + +} // namespace odc +} // namespace onert + +#endif // __ONERT_ODC_QUANTIZER_LOADER_H__ diff --git a/runtime/onert/core/src/odc/QuantizerLoader.test.cc b/runtime/onert/core/src/odc/QuantizerLoader.test.cc new file mode 100644 index 000000000..112e65b27 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.test.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" + +#include <gtest/gtest.h> + +using namespace onert::odc; + +// Test QuantizerLoader singleton +TEST(odc_QuantizerLoader, singleton) +{ + QuantizerLoader &loader1 = QuantizerLoader::instance(); + QuantizerLoader &loader2 = QuantizerLoader::instance(); + ASSERT_EQ(&loader1, &loader2); +} + +// Test load quantizer library +TEST(odc_QuantizerLoader, load) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + // Unload because it may be loaded on previous tests + ASSERT_EQ(loader.unloadLibrary(), 0); + + if (loader.loadLibrary() == 0) + { + // Load twice to check if it is thread-safe + ASSERT_EQ(loader.loadLibrary(), 0); + } +} + +// Get quantizer function without loading quantizer library +TEST(odc_QuantizerLoader, neg_get) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + // Unload because it may be loaded on previous tests + ASSERT_EQ(loader.unloadLibrary(), 0); + ASSERT_EQ(loader.get(), nullptr); +} + +// Check quantizer function pointer when QuantizerLoader is unloaded +TEST(odc_QuantizerLoader, neg_unload) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + if (loader.loadLibrary() == 0) + ASSERT_NE(loader.get(), nullptr); + + ASSERT_EQ(loader.unloadLibrary(), 0); + ASSERT_EQ(loader.get(), nullptr); +} diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc index 13dab5b77..e7d90eec4 100644 --- a/runtime/onert/core/src/util/MDTableEventWriter.cc +++ b/runtime/onert/core/src/util/MDTableEventWriter.cc @@ -124,7 +124,7 @@ struct Graph : public MDContent void setOperations(const std::map<std::string, Operation> &name_to_op) { uint64_t graph_latency = end_ts - begin_ts; - for (auto it : name_to_op) + for (auto &&it : name_to_op) { auto op = it.second; op.graph_latency = graph_latency; @@ -172,7 +172,7 @@ struct Graph : public MDContent writeMDTableRow(os, op_headers_line); // Operation's contents - for (auto op : ops) + for (auto &&op : ops) { op.write(os); } |