diff options
Diffstat (limited to 'runtime/onert/core/src')
353 files changed, 20776 insertions, 12212 deletions
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc index bafa36d28..7b36f106d 100644 --- a/runtime/onert/core/src/backend/BackendContext.cc +++ b/runtime/onert/core/src/backend/BackendContext.cc @@ -16,40 +16,10 @@ #include "backend/BackendContext.h" -#include "ir/Operation.h" -#include "backend/IConstantInitializer.h" - namespace onert { namespace backend { -void BackendContext::initialize(const std::vector<OperationInfo> &operation_list, - const std::vector<ir::OperandIndex> &operand_list) -{ - _operation_list = operation_list; - _operand_list = operand_list; -} - -void BackendContext::initConsts() -{ - for (auto &op : _operation_list) - { - constant_initializer->setLayout(op.layout); - _graph->operations().at(op.index).accept(*constant_initializer); - } - - for (auto ind : _operand_list) - { - const auto &obj = _graph->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - } - - constant_initializer->run(); -} - } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/IConstantInitializer.cc deleted file mode 100644 index 934a42753..000000000 --- a/runtime/onert/core/src/backend/IConstantInitializer.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/IConstantInitializer.h" - -#include <Half.h> - -using float16 = Half; - -namespace onert -{ -namespace backend -{ - -void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) -{ - // For only CONSTANTS - // TODO Add to check if tensor has been allocated - if (!obj.isConstant()) - return; - - const auto type = obj.typeInfo().type(); - using ir::DataType; - - switch (type) - { - case DataType::FLOAT32: - _init_map[index] = copyInit<float>; - break; - case DataType::INT32: - _init_map[index] = copyInit<int32_t>; - break; - case DataType::UINT32: - _init_map[index] = copyInit<uint32_t>; - break; - case DataType::BOOL8: - case DataType::QUANT_UINT8_ASYMM: - _init_map[index] = copyInit<uint8_t>; - break; - case DataType::QUANT_INT8_SYMM: - _init_map[index] = copyInit<int8_t>; - break; - case DataType::FLOAT16: - _init_map[index] = copyInit<float16>; - break; - case DataType::INT64: - _init_map[index] = copyInit<int64_t>; - break; - default: - throw std::runtime_error("Not supported, yet"); - break; - } -} - -void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) -{ - // For only CONSTANTS - // TODO Add to check if tensor has been allocated - if (!obj.isConstant()) - return; - - const auto type = obj.typeInfo().type(); - using ir::DataType; - using namespace std::placeholders; - - switch (type) - { - case DataType::FLOAT32: - _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout); - break; - case DataType::INT32: - _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout); - break; - case DataType::UINT32: - _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout); - break; - case DataType::BOOL8: - case DataType::QUANT_UINT8_ASYMM: - _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout); - break; - case DataType::QUANT_INT8_SYMM: - _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout); - break; - case DataType::FLOAT16: - _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout); - break; - case DataType::INT64: - _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout); - break; - default: - throw std::runtime_error("Not supported, yet"); - break; - } -} - -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/IPortableTensor.cc index ba5bafd75..cec34e780 100644 --- a/runtime/onert/core/src/backend/controlflow/Tensor.h +++ b/runtime/onert/core/src/backend/IPortableTensor.cc @@ -14,22 +14,16 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ -#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ - -#include <backend/cpu_common/Tensor.h> +#include "backend/IPortableTensor.h" namespace onert { namespace backend { -namespace controlflow -{ -using Tensor = cpu_common::Tensor; +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +IPortableTensor::~IPortableTensor() {} -} // namespace controlflow } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/ITensor.cc b/runtime/onert/core/src/backend/ITensor.cc index 7127ed93d..1339cb409 100644 --- a/runtime/onert/core/src/backend/ITensor.cc +++ b/runtime/onert/core/src/backend/ITensor.cc @@ -21,14 +21,9 @@ namespace onert namespace backend { -ir::Shape ITensor::getShape() const -{ - onert::ir::Shape shape(num_dimensions()); - for (uint32_t d = 0; d < num_dimensions(); d++) - shape.dim(d) = dimension(d); - - return shape; -} +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +ITensor::~ITensor() {} } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/Allocator.cc b/runtime/onert/core/src/backend/basic/Allocator.cc index 0ba444ee6..61214dfad 100644 --- a/runtime/onert/core/src/backend/cpu_common/Allocator.cc +++ b/runtime/onert/core/src/backend/basic/Allocator.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "backend/cpu_common/Allocator.h" +#include "backend/basic/Allocator.h" #include "util/logging.h" @@ -22,7 +22,7 @@ namespace onert { namespace backend { -namespace cpu_common +namespace basic { Allocator::Allocator(uint32_t capacity) @@ -33,6 +33,6 @@ Allocator::Allocator(uint32_t capacity) VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl; } -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc new file mode 100644 index 000000000..c02cc0cf2 --- /dev/null +++ b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/basic/BackendContextHelpers.h" diff --git a/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc new file mode 100644 index 000000000..07bcb09ee --- /dev/null +++ b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/basic/DynamicTensorManager.h" + +#include "util/logging.h" +#include "misc/polymorphic_downcast.h" + +namespace onert +{ +namespace backend +{ +namespace basic +{ + +DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®) + : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg} +{ + // DO NOTHING +} + +void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind, + const ir::OperandInfo &tensor_info, + ir::Layout backend_layout) +{ + assert(_tensors->getNativeTensor(ind) == nullptr); + auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get()); + _tensors->setNativeTensor(ind, std::move(tensor)); +} + +const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind) +{ + auto ptr = _tensors->getITensor(ind); + assert(ptr); + return ptr; +} + +} // namespace basic +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc b/runtime/onert/core/src/backend/basic/MemoryManager.cc index 8cb9c22ca..05fd9cc77 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc +++ b/runtime/onert/core/src/backend/basic/MemoryManager.cc @@ -14,18 +14,19 @@ * limitations under the License. */ -#include <backend/cpu_common/MemoryManager.h> +#include <backend/basic/MemoryManager.h> #include <cassert> #include "MemoryPlannerFactory.h" #include "util/ConfigSource.h" +#include "util/logging.h" namespace onert { namespace backend { -namespace cpu_common +namespace basic { MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()} @@ -34,20 +35,20 @@ MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()} } MemoryManager::MemoryManager(const std::string planner_id) - : _mem_planner{createMemoryPlanner(planner_id)} + : _mem_planner{createMemoryPlanner(planner_id)} { // DO NOTHING } -cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner() +basic::IMemoryPlanner *MemoryManager::createMemoryPlanner() { auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER); - return cpu_common::MemoryPlannerFactory::get().create(planner_id); + return basic::MemoryPlannerFactory::get().create(planner_id); } -cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id) +basic::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id) { - return cpu_common::MemoryPlannerFactory::get().create(planner_id); + return basic::MemoryPlannerFactory::get().create(planner_id); } void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) @@ -59,7 +60,7 @@ void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->rel void MemoryManager::allocate(void) { - _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity()); + _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity()); assert(_mem_alloc->base()); } @@ -70,20 +71,20 @@ uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const return _mem_alloc->base() + mem_blk.offset; } -std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ir::OperandIndex &ind, - uint32_t capacity) +std::shared_ptr<basic::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor, + uint32_t capacity) { - auto find = _mem_alloc_map.find(ind); + auto find = _mem_alloc_map.find(tensor); if (find != _mem_alloc_map.end()) throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated."); - _mem_alloc_map[ind] = std::make_shared<cpu_common::Allocator>(capacity); - return _mem_alloc_map[ind]; + _mem_alloc_map[tensor] = std::make_shared<basic::Allocator>(capacity); + return _mem_alloc_map[tensor]; } -void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind) +void DynamicMemoryManager::deallocate(const ITensor *tensor) { - auto find = _mem_alloc_map.find(ind); + auto find = _mem_alloc_map.find(tensor); if (find == _mem_alloc_map.end()) throw std::runtime_error("Cannot find Allocator for the requested index"); @@ -93,7 +94,7 @@ void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind) void DynamicMemoryManager::deallocate(void) { - for (auto &mem_alloc : _mem_alloc_map) + for (auto &&mem_alloc : _mem_alloc_map) { // Release memory buffer of mem_alloc mem_alloc.second->release(); @@ -102,6 +103,6 @@ void DynamicMemoryManager::deallocate(void) _mem_alloc_map.clear(); } -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc index 75c2da7d2..1c048043c 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc +++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc @@ -22,24 +22,21 @@ namespace onert { namespace backend { -namespace cpu_common +namespace basic { void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size) { - assert(size != 0); - Block blk{_capacity, size}; _mem_plans[ind] = blk; _capacity += size; - VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size - << std::endl; + VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl; } void BumpPlanner::release(const ir::OperandIndex &ind) { - VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): " + VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): " << "NOTHING does" << std::endl; } @@ -59,11 +56,9 @@ void BumpPlanner::release(const ir::OperandIndex &ind) // the previous claim_base_offset. void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size) { - assert(size != 0); - // Find the right position for claiming uint32_t next_offset = 0; - for (auto &mem_claim : _claim_table) + for (const auto &mem_claim : _claim_table) { auto claimed_base_offset = mem_claim.first; auto claimed_size = _mem_plans[mem_claim.second].size; @@ -81,7 +76,7 @@ void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size) _claim_table[next_offset] = ind; _mem_plans[ind] = {next_offset, size}; - VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]" + VERBOSE(FF_PLANNER) << "claim(" << ind << "): [+" << next_offset << ", " << size << "sz]" << std::endl; if (_capacity < next_offset + size) @@ -102,7 +97,7 @@ void FirstFitPlanner::release(const ir::OperandIndex &ind) _claim_table.erase(it); - VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]" + VERBOSE(FF_PLANNER) << "release(" << index << "): [+" << offset << ", " << size << "sz]" << std::endl; return; } @@ -111,16 +106,14 @@ void FirstFitPlanner::release(const ir::OperandIndex &ind) } WICPlanner::WICPlanner() - : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(), - _operands() + : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(), + _operands() { // DO NOTHING } void WICPlanner::claim(const ir::OperandIndex &ind, size_t size) { - assert(size != 0); - _operands.emplace(size, ind); _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(), _live_operands.cend()); @@ -130,13 +123,13 @@ void WICPlanner::claim(const ir::OperandIndex &ind, size_t size) } _live_operands.emplace(ind); - VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl; + VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl; } void WICPlanner::release(const ir::OperandIndex &ind) { _live_operands.erase(ind); - VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl; + VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl; } /* @@ -154,7 +147,7 @@ void WICPlanner::buildMemoryPlans() { uint32_t size = operand.first; const ir::OperandIndex &ind = operand.second; - VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl; + VERBOSE(WIC_PLANNER) << "build_plan(" << ind << "): [" << size << "sz]" << std::endl; uint32_t next_offset = 0; if (_interference_graph.count(ind)) @@ -190,8 +183,8 @@ void WICPlanner::buildMemoryPlans() } _mem_plans[ind] = {next_offset, size}; - VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size - << "sz]" << std::endl; + VERBOSE(WIC_PLANNER) << "alloc(" << ind << "): [+" << next_offset << ", " << size << "sz]" + << std::endl; if (_capacity < next_offset + size) { @@ -210,6 +203,6 @@ WICPlanner::MemoryPlans &WICPlanner::memory_plans() return _mem_plans; } -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h b/runtime/onert/core/src/backend/basic/MemoryPlanner.h index 7c387e542..661d0b5d9 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h +++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.h @@ -19,23 +19,23 @@ * @brief      This file contains Memory Planning related classes */ -#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ -#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ +#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__ +#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__ #include <map> #include <vector> #include <unordered_set> #include <memory> -#include "backend/cpu_common/Allocator.h" -#include "backend/cpu_common/IMemoryPlanner.h" +#include "backend/basic/Allocator.h" +#include "backend/basic/IMemoryPlanner.h" #include "ir/OperandIndexMap.h" namespace onert { namespace backend { -namespace cpu_common +namespace basic { /** @@ -153,8 +153,8 @@ private: std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands; }; -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__ +#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__ diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc index 5208a94d4..a32228cbe 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc +++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc @@ -21,13 +21,13 @@ TEST(Allocator, allocate_test) { - ::onert::backend::cpu_common::Allocator allocator(1024); + ::onert::backend::basic::Allocator allocator(1024); ASSERT_NE(allocator.base(), nullptr); } TEST(BumpPlanner, claim_test) { - ::onert::backend::cpu_common::BumpPlanner planner; + ::onert::backend::basic::BumpPlanner planner; auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) { onert::ir::OperandIndex mem_idx(index); @@ -44,7 +44,7 @@ TEST(BumpPlanner, claim_test) TEST(FirstFitPlanner, claim_release_test) { - ::onert::backend::cpu_common::FirstFitPlanner planner; + ::onert::backend::basic::FirstFitPlanner planner; auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) { onert::ir::OperandIndex mem_idx(index); @@ -128,7 +128,7 @@ TEST(FirstFitPlanner, claim_release_test) TEST(WICPlanner, claim_release_test) { - ::onert::backend::cpu_common::WICPlanner planner; + ::onert::backend::basic::WICPlanner planner; auto claim = [&planner](uint32_t index, size_t size) { onert::ir::OperandIndex mem_idx(index); diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc index ead4f3294..e12635359 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc +++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc @@ -22,7 +22,7 @@ namespace onert { namespace backend { -namespace cpu_common +namespace basic { MemoryPlannerFactory &MemoryPlannerFactory::get() @@ -48,6 +48,6 @@ IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key) return new FirstFitPlanner; // Default Planner } -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h index d14ec13ca..fe32f4c99 100644 --- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h +++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ -#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ +#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__ +#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__ -#include "backend/cpu_common/IMemoryPlanner.h" +#include "backend/basic/IMemoryPlanner.h" #include <string> @@ -25,7 +25,7 @@ namespace onert { namespace backend { -namespace cpu_common +namespace basic { class MemoryPlannerFactory @@ -40,8 +40,8 @@ public: IMemoryPlanner *create(const std::string &key); }; -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__ +#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__ diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 440f70c93..71cde4cde 100644 --- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -14,65 +14,55 @@ * limitations under the License. */ -#include "backend/cpu_common/StaticTensorManager.h" +#include "backend/basic/StaticTensorManager.h" -#include "backend/cpu_common/DynamicTensorManager.h" +#include "backend/basic/DynamicTensorManager.h" +#include "backend/basic/Tensor.h" #include <util/logging.h> namespace onert { namespace backend { -namespace cpu_common +namespace basic { StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, - IDynamicTensorManager *dynamic_tensor_manager) - : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} + DynamicTensorManager *dynamic_tensor_manager) + : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, _dynamic_tensor_manager{ + dynamic_tensor_manager} { // DO NOTHING } -void StaticTensorManager::allocateConsts(void) +StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, + const std::string planner_id, + DynamicTensorManager *dynamic_tensor_manager) + : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, _dynamic_tensor_manager{ + dynamic_tensor_manager} { - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second; - if (_as_constants[ind]) - { - auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size()); - tensor->setBuffer(mem_alloc); - auto buffer = mem_alloc->base(); - VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) - << "size : " << tensor->total_size() << std::endl; - } - } + // DO NOTHING } void StaticTensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); - for (auto &pair : _tensors->native_tensors()) + for (auto &&pair : _tensors->native_tensors()) { const auto &ind = pair.first; - auto tensor = pair.second; + auto tensor = pair.second.get(); if (!_as_constants[ind] && !tensor->is_dynamic()) { auto *buffer = _nonconst_mgr->getBuffer(ind); tensor->setBuffer(buffer); - VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast<void *>(buffer) << std::endl; + VERBOSE(CPU_StaticTensorManager) + << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl; } } } -void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } - void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, @@ -80,8 +70,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, bool as_const) { assert(!_tensors->getNativeTensor(ind)); - auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager); - _tensors->setNativeTensor(ind, tensor); + if (as_const) + { + auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout); + _tensors->setNativeTensor(ind, std::move(tensor)); + } + else + { + auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, + _dynamic_tensor_manager->dynamic_mem_mgr().get()); + _tensors->setNativeTensor(ind, std::move(tensor)); + } _as_constants[ind] = as_const; } @@ -113,6 +112,6 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde fn(it.first); } -} // namespace cpu_common +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/basic/Tensor.cc b/runtime/onert/core/src/backend/basic/Tensor.cc new file mode 100644 index 000000000..de1cff4f4 --- /dev/null +++ b/runtime/onert/core/src/backend/basic/Tensor.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/basic/Tensor.h" + +#include "ir/DataType.h" +#include "backend/basic/MemoryManager.h" + +namespace onert +{ +namespace backend +{ +namespace basic +{ + +Tensor::~Tensor() {} + +size_t Tensor::calcOffset(const ir::Coordinates &coords) const +{ + auto shape = getShape(); + size_t rank = shape.rank(); + rank = rank == 0 ? 1 : rank; + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + auto dim = shape.rank() == 0 ? 1 : shape.dim(i); + offset = offset * dim + coords[i]; + } + offset *= sizeOfDataType(data_type()); + return offset; +} + +void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); } + +bool Tensor::applyShape(const ir::Shape &new_shape) +{ + bool previously_dynamic = is_dynamic(); + + auto allocTensorMem = [&]() { + auto capacity = total_size(); + assert(_dynamic_mem_mgr); + auto alloc = _dynamic_mem_mgr->allocate(this, capacity); + setBuffer(alloc); + }; + + if (!previously_dynamic || buffer() == nullptr) + { + // Always set shape - when buffer with same size was already allocated, shape could differ + setShape(new_shape); + set_dynamic(); + allocTensorMem(); + } + else + { + auto previous_size = total_size(); + auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type()); + if (previous_size != new_size) + { + assert(_dynamic_mem_mgr); + _dynamic_mem_mgr->deallocate(this); + + setShape(new_shape); + set_dynamic(); + allocTensorMem(); + } + else + { // when buffer with same size was already allocated, shape could differ + setShape(new_shape); + } + } + return true; +} + +ir::Shape Tensor::getShape() const { return _info.shape(); } + +void Tensor::deallocBuffer() +{ + if (_allocator) + { + _buffer = nullptr; + _allocator.reset(); + if (_dynamic_mem_mgr) + { + _dynamic_mem_mgr->deallocate(this); + } + } +} + +} // namespace basic +} // namespace backend +} // namespace onert + +// ExternalTensor + +namespace onert +{ +namespace backend +{ +namespace basic +{ + +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +ExternalTensor::~ExternalTensor() {} + +} // namespace basic +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc new file mode 100644 index 000000000..4912af1f5 --- /dev/null +++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <backend/basic/TensorBuilder.h> + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace basic +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::string planner_id) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // CPU backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto &tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); } + +} // namespace basic +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc index f34564dd9..d09604224 100644 --- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc +++ b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,30 +14,36 @@ * limitations under the License. */ -#include "backend/cpu_common/Tensor.h" +#include <backend/basic/train/TrainableTensor.h> namespace onert { namespace backend { -namespace cpu_common +namespace basic +{ +namespace train { -size_t Tensor::calcOffset(const ir::Coordinates &coords) const +std::vector<ITensor *> TrainableTensor::optVars() { - size_t rank = num_dimensions(); - rank = rank == 0 ? 1 : rank; - size_t offset = 0; - for (size_t i = 0; i < rank; ++i) + std::vector<ITensor *> ret; + for (auto &&e : _opt_vars) { - offset = offset * dimension(i) + coords[i]; + ret.emplace_back(e.get()); } - offset *= sizeOfDataType(data_type()); - return offset; + return ret; } -void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); } +void TrainableTensor::fillBuffer(const std::shared_ptr<ir::Data> &data) +{ + auto *buffer = _tensor.buffer(); + assert(buffer); + assert(total_size() == data->size()); + std::memcpy(buffer, data->base(), data->size()); +} -} // namespace cpu_common +} // namespace train +} // namespace basic } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h index 670f7750f..c05494a6a 100644 --- a/runtime/onert/core/src/backend/controlflow/Backend.h +++ b/runtime/onert/core/src/backend/builtin/Backend.h @@ -14,16 +14,24 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__ -#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__ +#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_H__ +#define __ONERT_BACKEND_BUILTIN_BACKEND_H__ +#include "BackendContext.h" #include "Config.h" -#include "ConstantInitializer.h" #include "KernelGenerator.h" #include "TensorBuilder.h" #include "Tensor.h" +#ifdef ONERT_TRAIN +#include "train/BackendContext.h" +#include "train/KernelGenerator.h" +#include "train/TensorRegistry.h" +#endif // ONERT_TRAIN #include <backend/Backend.h> +#ifdef ONERT_TRAIN +#include <backend/train/ITrainableBackend.h> +#endif // ONERT_TRAIN #include <memory> @@ -31,22 +39,23 @@ namespace onert { namespace backend { -namespace controlflow +namespace builtin { class Backend : public ::onert::backend::Backend +#ifdef ONERT_TRAIN + , + public backend::train::ITrainableBackend +#endif // ONERT_TRAIN { public: Backend() : _config{std::make_shared<Config>()} {} std::shared_ptr<IConfig> config() const override { return _config; } - std::unique_ptr<BackendContext> newContext(const ir::Graph &graph, - const std::shared_ptr<custom::IKernelBuilder> &, - bool) const override + std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override { - const auto &operands = graph.operands(); - auto context = std::make_unique<BackendContext>(this, &graph); + auto context = std::make_unique<BackendContext>(this, std::move(data)); // ControlFlow backend may not build tensors for itself because the backend's operation uses // tensors of other baceknd instead // But the backend builds tensors in case of that the controlflow operation may have constant @@ -68,19 +77,33 @@ public: auto tb = std::make_shared<TensorBuilder>(tr); context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); - context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr); - context->tensor_register = nullptr; - context->optimizer = nullptr; + context->kernel_gen = std::make_shared<KernelGenerator>( + *context->graph(), tb->dynamicTensorManager(), tr, context->external_context()); return context; } +#ifdef ONERT_TRAIN + std::unique_ptr<backend::train::TrainableBackendContext> + newContext(backend::train::TrainableContextData &&tdata) const override + { + const auto &tgraph = *tdata.tgraph; + auto tr = std::make_shared<train::TensorRegistry>(); + // TODO Create TensorBuilder if necessary + auto tdata_ptr = std::make_unique<backend::train::TrainableContextData>(std::move(tdata)); + auto context = std::make_unique<train::BackendContext>(this, std::move(tdata_ptr), tr); + + context->kernel_gen = + std::make_shared<train::KernelGenerator>(tgraph, tr, context->external_context()); + return context; + } +#endif // ONERT_TRAIN + private: std::shared_ptr<IConfig> _config; }; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__ +#endif // __ONERT_BACKEND_BUILTIN_BACKEND_H__ diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc new file mode 100644 index 000000000..573617e28 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "KernelGenerator.h" +#include "backend/basic/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); } + +FunctionMap BackendContext::genKernels() +{ + FunctionMap ret; + + for (auto &&op_ind : _data.op_order) + { + auto fn_seq = kernel_gen->generate(op_ind); + ret.emplace_back(op_ind, std::move(fn_seq)); + } + + basic::initConsts(*this); + + // NOTE For memory optimization, we want to free some operand data + const_cast<ir::Graph *>(graph())->operands().iterate( + [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); + + for (auto &&it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.h b/runtime/onert/core/src/backend/builtin/BackendContext.h new file mode 100644 index 000000000..93e825239 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/BackendContext.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, ContextData &&data, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, std::move(data), tensor_registry), + tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, + _external_context(std::make_shared<ExternalContext>()) + { + } + + ITensorRegistry *genTensors() override; + + FunctionMap genKernels() override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +private: + void planTensors(const std::vector<onert::ir::OperationIndex> &order, + const compiler::GraphLowerInfo &lower_info); + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr<ExternalContext> _external_context; +}; + +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/core/src/backend/controlflow/Config.cc b/runtime/onert/core/src/backend/builtin/Config.cc index 5ec01fe11..e5f6d4c21 100644 --- a/runtime/onert/core/src/backend/controlflow/Config.cc +++ b/runtime/onert/core/src/backend/builtin/Config.cc @@ -20,18 +20,18 @@ namespace onert { namespace backend { -namespace controlflow +namespace builtin { -std::string Config::ID = "controlflow"; +std::string Config::ID = "builtin"; bool Config::initialize() { return true; } -ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout) +ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout) { return frontend_layout; } -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/Config.h b/runtime/onert/core/src/backend/builtin/Config.h index 6645ed59d..196b299d3 100644 --- a/runtime/onert/core/src/backend/controlflow/Config.h +++ b/runtime/onert/core/src/backend/builtin/Config.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__ -#define __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__ +#ifndef __ONERT_BACKEND_BUILTIN_CONFIG_H__ +#define __ONERT_BACKEND_BUILTIN_CONFIG_H__ #include <backend/IConfig.h> #include <memory> @@ -25,7 +25,7 @@ namespace onert { namespace backend { -namespace controlflow +namespace builtin { class Config : public IConfig @@ -34,7 +34,7 @@ public: static std::string ID; std::string id() override { return ID; } bool initialize() override; - ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override; bool supportPermutation() override { return false; } bool supportDynamicTensor() override { @@ -46,8 +46,8 @@ public: std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } }; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__ +#endif // __ONERT_BACKEND_BUILTIN_CONFIG_H__ diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.cc b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h index c8e2ebade..6b8eb3e9d 100644 --- a/runtime/onert/core/src/backend/controlflow/UserTensor.cc +++ b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h @@ -14,27 +14,22 @@ * limitations under the License. */ -#include "UserTensor.h" +#ifndef __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__ +#define __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__ + +#include <backend/basic/ConstantInitializer.h> namespace onert { namespace backend { -namespace controlflow +namespace builtin { -size_t UserTensor::calcOffset(const ir::Coordinates &coords) const -{ - size_t rank = num_dimensions(); - size_t offset = 0; - for (size_t i = 0; i < rank; ++i) - { - offset = offset * dimension(i) + coords[i]; - } - offset *= sizeOfDataType(data_type()); - return offset; -} +using ConstantInitializer = basic::ConstantInitializer; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert + +#endif // __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h new file mode 100644 index 000000000..148948a9c --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__ + +#include "TensorRegistry.h" +#include "Tensor.h" + +#include <backend/basic/DynamicTensorManager.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +using DynamicTensorManager = basic::DynamicTensorManager; + +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__ diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h new file mode 100644 index 000000000..390dbb579 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__ + +#include <util/ConfigSource.h> + +#include <ruy/context.h> +#include <ruy/context_get_ctx.h> +#include <ruy/ctx.h> +#include <ruy/tune.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +// TODO Unify this with cpu::ExternalContext +class ExternalContext +{ +private: + static const int kDefaultNumThreadpoolThreads = 1; + +public: + ExternalContext() : _ruy_context(std::make_unique<ruy::Context>()) + { + setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS)); + initPerThreadState(); + } + + void setMaxNumThreads(int max_num_threads) + { + const int target_num_threads = + max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads; + _ruy_context->set_max_num_threads(target_num_threads); + } + + ruy::Context *ruy_context() const { return _ruy_context.get(); } + +private: + void initPerThreadState() + { + // Initialize per-thread state. + const int thread_count = _ruy_context->max_num_threads(); + auto ctx = ruy::get_ctx(_ruy_context.get()); + ctx->EnsureThreadSpecificResources(thread_count); + for (int i = 0; i < thread_count; i++) + { + ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning()); + } + } + +private: + const std::unique_ptr<ruy::Context> _ruy_context; +}; + +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.cc b/runtime/onert/core/src/backend/builtin/IOTensor.cc new file mode 100644 index 000000000..f7f4a6977 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/IOTensor.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IOTensor.h" + +#include <assert.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +IOTensor::~IOTensor() {} + +IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout) + : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout} +{ + setUserTensor(nullptr, 0); +} + +void IOTensor::setTensor(IPortableTensor *tensor) +{ + assert(tensor); + assert(tensor != this); + // TODO Handle when layout was changed + assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet + _user_tensor.reset(); + _tensor = tensor; +} + +void IOTensor::setUserTensor(uint8_t *buffer, size_t size) +{ + _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size); + _tensor = _user_tensor.get(); +} + +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h new file mode 100644 index 000000000..d94ed0bca --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/IOTensor.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__ +#define __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__ + +#include "backend/IPortableTensor.h" +#include "UserTensor.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +/** + * @brief Tensor object that indirects to the tensor it is pointing to. + * + * A model I/O tensor could be two types. + * + * 1. @c UserTensor, if it is the primary graph + * 2. Any other derivative of @c IPortableTensor from another backend, otherwise + * + * To support these, this object indirects everything to the actual tensor pointer. + * Exceptionally if it is UserTensor, this class creates and manages it. + */ +class IOTensor : public IPortableTensor +{ +public: + IOTensor(const ir::OperandInfo &info, ir::Layout layout); + ~IOTensor(); + +public: + void setTensor(IPortableTensor *tensor); + void setUserTensor(uint8_t *buffer, size_t size); + const ir::OperandInfo &orig_info() const { return _orig_info; } + ir::Layout orig_layout() const { return _orig_layout; } + +public: + uint8_t *buffer() const override { return _tensor->buffer(); } + size_t total_size() const override { return _tensor->total_size(); } + size_t calcOffset(const ir::Coordinates &coords) const override + { + return _tensor->calcOffset(coords); + } + ir::Layout layout() const override { return _tensor->layout(); } + ir::DataType data_type() const override { return _tensor->data_type(); } + bool is_dynamic() const override + { + return _is_dynamic || _orig_info.isDynamic() || (_tensor && _tensor->is_dynamic()); + } + void set_dynamic() override { _is_dynamic = true; } + ir::Shape getShape() const override { return _tensor->getShape(); } + void setShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + _tensor->setShape(shape); + } + bool is_constant() const override { return _tensor->is_constant(); } + bool applyShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + return _tensor->applyShape(shape); + } + +public: + void setShapeOfIPortableTensor(const ir::Shape &shape) { _info.shape(shape); } + +private: + const ir::OperandInfo _orig_info; + const ir::Layout _orig_layout; + bool _is_dynamic{false}; + IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected + std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object +}; + +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc new file mode 100644 index 000000000..00c200a92 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "kernel/IfLayer.h" +#include "kernel/PermuteLayer.h" +#include "kernel/WhileLayer.h" + +#include "exec/FunctionSequence.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context) + : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager}, + _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{}, + _external_context{external_context} +{ + UNUSED_RELEASE(_graph); + UNUSED_RELEASE(_tensor_registries); + UNUSED_RELEASE(_executors); +} + +std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind) +{ + assert(_dyn_tensor_manager); + assert(_tensor_reg); + + auto ret = std::make_unique<exec::FunctionSequence>(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); + { + dyn_ctx->op = &_graph.operations().at(ind); + dyn_ctx->dynamic_shape_inferer = + std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg); + } + ret->dynamic_tensor_ctx(dyn_ctx); + + auto &op = _graph.operations().at(ind); + op.accept(*this); + assert(_return_fn); // _return_fn must have been generated + ret->append(std::move(_return_fn)); + + return ret; +} + +void KernelGenerator::visit(const ir::operation::If &node) +{ + const auto then_subg_index = node.param().then_subg_index; + const auto else_subg_index = node.param().else_subg_index; + + std::vector<backend::IPortableTensor *> input_tensors; + for (const auto &input_index : node.getInputs()) + { + auto input_tensor = getPortableTensor(input_index); + input_tensors.emplace_back(input_tensor); + } + + std::vector<backend::IPortableTensor *> output_tensors; + for (const auto &output_index : node.getOutputs()) + { + auto output_tensor = getPortableTensor(output_index); + output_tensors.emplace_back(output_tensor); + } + + // IfLayer just set Executors instead of then and else executor to avoid complexity of + // creating executor recusively + const auto cond_tensor = input_tensors.front(); + input_tensors.erase(input_tensors.begin()); + auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>( + cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors, + _model_index, _external_context); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Permute &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + // Add PermuteLayer + std::vector<ITensor *> output_tensors{getTensor(output_index)}; + std::vector<ITensor *> input_tensors{getTensor(input_index)}; + + auto fn = + std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context); + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::While &node) +{ + const auto cond_subg_index = node.param().cond_subg_index; + const auto body_subg_index = node.param().body_subg_index; + + // This op does not support input as a constant, because builtin backend does not have + // TensorBuilder + std::vector<backend::IPortableTensor *> input_tensors; + for (const auto &input_index : node.getInputs()) + { + auto input_tensor = getPortableTensor(input_index); + input_tensors.emplace_back(input_tensor); + } + + std::vector<backend::IPortableTensor *> output_tensors; + for (const auto &output_index : node.getOutputs()) + { + auto output_tensor = getPortableTensor(output_index); + output_tensors.emplace_back(output_tensor); + } + + // WhileLayer just set Executors instead of cond and body executor to avoid complexity of + // creating executor recusively + auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>( + input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index, + _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context); + + _return_fn = std::move(fn); +} + +backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index) +{ + // get Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getITensor(index); + assert(ret != nullptr); + return ret; +} + +backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index) +{ + auto ret = _tensor_reg->getPortableTensor(index); + assert(ret != nullptr); + return ret; +} + +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h index b84a810e4..3c86fe306 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h @@ -14,60 +14,66 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ -#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ +#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ -#include <backend/IKernelGenerator.h> -#include <backend/ITensorBuilder.h> -#include <exec/IExecutor.h> -#include <ir/Graph.h> -#include "TensorBuilder.h" -#include "compiler/TensorRegistries.h" +#include "DynamicTensorManager.h" +#include "ExternalContext.h" #include "TensorRegistry.h" +#include "../../compiler/TensorRegistries.h" + +#include "backend/basic/KernelGeneratorBase.h" +#include "exec/IExecutors.h" +#include "ir/Graph.h" namespace onert { namespace backend { -namespace controlflow +namespace builtin { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public basic::KernelGeneratorBase { public: - KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, - const std::shared_ptr<TensorRegistry> &tensor_reg); + KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context); void setTensorRegistries(const compiler::TensorRegistries &tensor_registries) { _tensor_registries = tensor_registries; } - void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map) + void setExecutors(const std::shared_ptr<exec::IExecutors> &executors) { // FIXME Using shared_ptr's raw pointer! - _executor_map = executor_map.get(); + _executors = executors.get(); } - using IKernelGenerator::visit; + void setModelIndex(const ir::ModelIndex &index) { _model_index = index; } + + std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override; - void visit(const ir::OpSequence &) override; +private: void visit(const ir::operation::If &) override; void visit(const ir::operation::Permute &) override; void visit(const ir::operation::While &) override; private: - std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index); + backend::ITensor *getTensor(const ir::OperandIndex &index); + backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index); private: - const ir::Graph &_graph; - IDynamicTensorManager *_dyn_tensor_manager; + DynamicTensorManager *_dyn_tensor_manager; std::shared_ptr<TensorRegistry> _tensor_reg; compiler::TensorRegistries _tensor_registries; - exec::ExecutorMap *_executor_map; + exec::IExecutors *_executors; + ir::ModelIndex _model_index; + const std::shared_ptr<ExternalContext> _external_context; }; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ +#endif // __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h b/runtime/onert/core/src/backend/builtin/Tensor.h index fa2a2d54c..d55e64161 100644 --- a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h +++ b/runtime/onert/core/src/backend/builtin/Tensor.h @@ -14,23 +14,23 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__ -#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__ +#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_H__ +#define __ONERT_BACKEND_BUILTIN_TENSOR_H__ -#include "backend/ITensorRegistry.h" -#include "UserTensor.h" +#include <backend/basic/Tensor.h> namespace onert { namespace backend { -namespace controlflow +namespace builtin { -using UserTensorRegistry = PortableTensorRegistryTemplate<UserTensor>; +using Tensor = basic::Tensor; +using ExternalTensor = basic::ExternalTensor; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__ +#endif // __ONERT_BACKEND_BUILTIN_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc index e5c3f5fd5..a2f7af3ea 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc @@ -24,13 +24,13 @@ namespace onert { namespace backend { -namespace controlflow +namespace builtin { TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg) - : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, - _static_tensor_mgr{ - new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())}, + _static_tensor_mgr{ + new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} { /* empty */ } @@ -40,15 +40,14 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op { _tensor_info_map.emplace(ind, info); - _tensor_layout_map.insert({ind, backend_layout}); - + VERBOSE_F() << "cpucommon REGISTER!! " << ind << std::endl; if (info.isDynamic()) { - _dynamic_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind]); + _dynamic_tensor_mgr->buildTensor(ind, info, backend_layout); } else { - _static_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind], info.isConstant()); + _static_tensor_mgr->buildTensor(ind, info, backend_layout, info.isConstant()); } } @@ -58,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors return; - const auto tensor_info = _tensor_info_map.at(ind); + const auto &tensor_info = _tensor_info_map.at(ind); if (!nativeOwnTensorAt(ind)->is_dynamic()) { @@ -89,39 +88,18 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const return _tensor_info_map.find(ind) != _tensor_info_map.end(); } -void TensorBuilder::prepare(void) -{ - _static_tensor_mgr->allocateConsts(); - _static_tensor_mgr->allocateNonconsts(); -} +void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); } -void TensorBuilder::allocate() +DynamicTensorManager *TensorBuilder::dynamicTensorManager(void) { - // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate - // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. + return _dynamic_tensor_mgr.get(); } -std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind) +basic::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind) { return _tensor_reg->getNativeOwnTensor(ind); } -std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void) -{ - return std::move(_static_tensor_mgr); -} - -std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void) -{ - return std::move(_dynamic_tensor_mgr); -} - -void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind, - const std::shared_ptr<UserTensor> &tensor) -{ - _tensor_reg->setNativeUserTensor(ind, tensor); -} - -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h index 2f2a2c47e..1e364c927 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h +++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.h @@ -14,29 +14,27 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__ -#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__ +#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__ -#include <backend/cpu_common/StaticTensorManager.h> -#include <backend/cpu_common/TensorRegistry.h> -#include <backend/cpu_common/Tensor.h> +#include <backend/basic/StaticTensorManager.h> +#include <backend/basic/TensorRegistry.h> +#include <backend/basic/Tensor.h> -#include <backend/ITensorBuilder.h> #include <ir/OperandIndexMap.h> #include <unordered_map> #include "DynamicTensorManager.h" -#include "UserTensorRegistry.h" namespace onert { namespace backend { -namespace controlflow +namespace builtin { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg); @@ -48,42 +46,34 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void allocate(void); - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; - - IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } - - std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override; + DynamicTensorManager *dynamicTensorManager(void); /** * @brief Get tensor with a specific OperandIndex. * @param ind OperandIndex for the tensor. There must exist a tensor with this ind. * If not, program will crash with assert or exception. - * @return shared_ptr<operand::Tensor> + * @return operand::Tensor * */ - std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind); - void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor); + basic::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind); private: const std::shared_ptr<TensorRegistry> _tensor_reg; std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr; - std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr; + std::unique_ptr<basic::StaticTensorManager> _static_tensor_mgr; ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; - ir::OperandIndexMap<ir::Layout> _tensor_layout_map; }; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__ +#endif // __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__ diff --git a/runtime/onert/core/src/backend/builtin/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/TensorRegistry.h new file mode 100644 index 000000000..ae68b1318 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/TensorRegistry.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__ + +#include "backend/basic/TensorRegistry.h" +#include "backend/ITensorRegistry.h" +#include "Tensor.h" +#include "IOTensor.h" +#include <assert.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +/** + * @brief Tensor registry class for builtin backend + * + * This class contains three types of tensors. Two native tensors(tensors that are managed by this + * backend) and the other is migrant tensor. + * + * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg ) + * - NOTE The tensor it actually points to can be from another backend + * - NativeOwnTensor - @c basic::Tensor managed by this backend ( in @c _base_reg ) + * - MigrantTensor - @c IPortableTensor managed by other backends + * + * @note @c _base_reg is used in implementation to reuse @c basic::StaticTensorManager + * + */ +class TensorRegistry : public ITensorRegistry +{ +public: + TensorRegistry() : _base_reg{new basic::TensorRegistry} {} + + ITensor *getITensor(const ir::OperandIndex &ind) override + { + auto base_tensor = _base_reg->getITensor(ind); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(ind); + } + + ITensor *getNativeITensor(const ir::OperandIndex &ind) override + { + auto base_tensor = _base_reg->getNativeITensor(ind); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(ind); + } + + IPortableTensor *getPortableTensor(const ir::OperandIndex &ind) + { + auto base_tensor = _base_reg->getPortableTensor(ind); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(ind); + } + + IPortableTensor *getNativeTensor(const ir::OperandIndex &ind) + { + auto base_tensor = _base_reg->getNativeTensor(ind); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(ind); + } + + Tensor *getNativeOwnTensor(const ir::OperandIndex &ind) + { + return _base_reg->getNativeTensor(ind); + } + + IOTensor *getNativeIOTensor(const ir::OperandIndex &ind) + { + auto tensor = _native_io_tensors.find(ind); + if (tensor != _native_io_tensors.end()) + return tensor->second.get(); + return nullptr; + } + + bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _base_reg->setMigrantTensor(ind, tensor); + return true; + } + + void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor) + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _base_reg->setNativeTensor(ind, std::move(tensor)); + } + + void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor) + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _native_io_tensors[ind] = std::move(tensor); + } + + const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors() + { + return _native_io_tensors; + } + std::shared_ptr<basic::TensorRegistry> base_reg() { return _base_reg; } + +private: + std::shared_ptr<basic::TensorRegistry> _base_reg; + ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors; +}; + +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.cc b/runtime/onert/core/src/backend/builtin/UserTensor.cc new file mode 100644 index 000000000..f0b00b928 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/UserTensor.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UserTensor.h" + +#include "util/Exceptions.h" +#include "ir/DataType.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ + +size_t UserTensor::calcOffset(const ir::Coordinates &coords) const +{ + size_t rank = getShape().rank(); + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + offset = offset * getShape().dim(i) + coords[i]; + } + offset *= sizeOfDataType(data_type()); + return offset; +} + +bool UserTensor::applyShape(const ir::Shape &new_shape) +{ + // User tensors cannot be reallocated. + auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type()); + if (total_size() < new_size) + throw InsufficientBufferSizeException{"User given buffer size is too small."}; + setShape(new_shape); + return true; +} + +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/builtin/UserTensor.h index 9be33595d..0d0ed73c5 100644 --- a/runtime/onert/core/src/backend/controlflow/UserTensor.h +++ b/runtime/onert/core/src/backend/builtin/UserTensor.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__ -#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__ +#ifndef __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__ +#define __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__ #include "ir/OperandInfo.h" #include "backend/IPortableTensor.h" @@ -24,7 +24,7 @@ namespace onert { namespace backend { -namespace controlflow +namespace builtin { /** @@ -38,16 +38,12 @@ namespace controlflow class UserTensor : public IPortableTensor { public: - UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size, - IDynamicTensorManager *dynamic_tensor_manager) - : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}, - _dynamic_tensor_manager{dynamic_tensor_manager} + UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size) + : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false} { } - UserTensor(const ir::OperandInfo &info, ir::Layout layout, - IDynamicTensorManager *dynamic_tensor_manager) - : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager} + UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0} { } @@ -61,31 +57,25 @@ public: public: uint8_t *buffer() const override { return _buffer; } size_t total_size() const override { return _size; } - size_t dimension(size_t index) const override { return _info.shape().dim(index); } - size_t num_dimensions() const override { return _info.shape().rank(); } size_t calcOffset(const ir::Coordinates &coords) const override; ir::Layout layout() const override { return _layout; } ir::DataType data_type() const override { return _info.typeInfo().type(); } - float data_scale() const override { return _info.typeInfo().scale(); } - int32_t data_offset() const override { return _info.typeInfo().offset(); } bool is_dynamic() const override { return _dynamic; } void set_dynamic() override { _dynamic = true; } ir::Shape getShape() const override { return _info.shape(); } void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); } bool is_constant() const override { return false; } - IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; } + bool applyShape(const ir::Shape &) override; private: - ir::OperandInfo _info; ir::Layout _layout; uint8_t *_buffer; size_t _size; bool _dynamic; - IDynamicTensorManager *_dynamic_tensor_manager; }; -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__ +#endif // __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc new file mode 100644 index 000000000..51bc5a8f2 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IfLayer.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace kernel +{ + +IfLayer::IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, + const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, + exec::IExecutors *executors, const ir::ModelIndex &model_index, + const std::shared_ptr<ExternalContext> &external_context) + : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, + _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors}, + _model_index{model_index}, _external_context{external_context} +{ + // At this point, executors may not have executors of then subg and else subg +} + +void IfLayer::run() +{ + // Check condition + // // If true + // // // Set _input_tensors -> then-subg's inputs + // // // Set outputs of then-subg -> _output_tensors + // // // Run then-subg + // // Else + // // // Set _input_tensors -> else-subg's inputs + // // // Set outputs of else-subg -> _output_tensors + // // // Run else-subg + + auto getResultCond = [](backend::IPortableTensor *tensor) -> bool { + bool ret = false; + tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); + return ret; + }; + + exec::IExecutor *subg_exec = nullptr; + bool cond_result = getResultCond(_cond_tensor); + if (cond_result) + { + VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl; + subg_exec = _executors->at(_model_index, _then_subg_index); + } + else + { + VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl; + subg_exec = _executors->at(_model_index, _else_subg_index); + } + + subg_exec->execute(_input_tensors, _output_tensors); + VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index) + << std::endl; +} + +} // namespace kernel +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h index ef3a6e6f6..8f639ced9 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h @@ -14,17 +14,18 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ -#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ +#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__ +#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__ -#include <backend/ITensor.h> -#include <exec/IExecutor.h> +#include <backend/IPortableTensor.h> +#include <exec/IExecutors.h> +#include "../ExternalContext.h" namespace onert { namespace backend { -namespace controlflow +namespace builtin { namespace kernel { @@ -32,32 +33,30 @@ namespace kernel class IfLayer : public ::onert::exec::IFunction { public: - IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor, - const std::vector<std::shared_ptr<backend::ITensor>> input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, - const exec::DynAllocInfoMap &outputs_dyn_alloc_info, + IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map); + exec::IExecutors *executors, const ir::ModelIndex &model_index, + const std::shared_ptr<ExternalContext> &external_context); public: void run() override; private: - const std::shared_ptr<backend::ITensor> _cond_tensor; - const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors; - const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; - const exec::DynAllocInfoMap _outputs_dyn_alloc_info; + backend::IPortableTensor *_cond_tensor; + const std::vector<backend::IPortableTensor *> _input_tensors; + const std::vector<backend::IPortableTensor *> _output_tensors; const ir::SubgraphIndex _then_subg_index; const ir::SubgraphIndex _else_subg_index; - exec::ExecutorMap *_executor_map; + exec::IExecutors *_executors; + ir::ModelIndex _model_index; + const std::shared_ptr<ExternalContext> _external_context; }; } // namespace kernel -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ +#endif // __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__ diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc new file mode 100644 index 000000000..600180077 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +#include "../../../exec/ShapeConverter.h" + +#include <ruy/context.h> // from @ruy + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace kernel +{ + +PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors, + const std::vector<ITensor *> &dst_tensors, + const std::shared_ptr<ExternalContext> &external_context) + : _external_context{external_context}, _tasks_map{} +{ + assert(src_tensors.size() == dst_tensors.size()); + _src_tensors = src_tensors; + _dst_tensors = dst_tensors; + _src_tensors_offsets.resize(src_tensors.size()); + _dst_tensors_offsets.resize(dst_tensors.size()); +} + +void PermuteLayer::optimize() +{ + // Remove copying of tensor as nullptr + auto src_it = _src_tensors.begin(); + auto dst_it = _dst_tensors.begin(); + auto src_offsets_it = _src_tensors_offsets.begin(); + auto dst_offsets_it = _dst_tensors_offsets.begin(); + while (src_it != _src_tensors.end()) + { + if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr)) + { + src_it = _src_tensors.erase(src_it); + dst_it = _dst_tensors.erase(dst_it); + src_offsets_it = _src_tensors_offsets.erase(src_offsets_it); + dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it); + } + else + { + auto src = *src_it; + auto dst = *dst_it; + src_offsets_it->resize(0); + dst_offsets_it->resize(0); + if (underlying_type(src->data_type()) != underlying_type(dst->data_type())) + continue; + const auto permute_type = [&]() -> PermuteType { + if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC && + dst->layout() == ir::Layout::NCHW) + { + return PermuteType::NHWC_TO_NCHW; + } + else if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NCHW && + dst->layout() == ir::Layout::NHWC) + { + return PermuteType::NCHW_TO_NHWC; + } + else + { + return PermuteType::COPY; + } + }(); + + // TODO Support different types + auto fn = [&](backend::ITensor &src_tensor) { + dst->access([&](backend::ITensor &dst_tensor) { + // NOTE The buffer of both tensor can be nullptr in this step + const auto data_size = ir::sizeOfDataType(src_tensor.data_type()); + + if (permute_type == PermuteType::COPY) + { + if ((!src_tensor.has_padding() && !dst_tensor.has_padding())) + { + const auto num_elements = src_tensor.getShape().num_elements(); + const int thread_count = + _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements) + ? _external_context->ruy_context()->max_num_threads() + : num_elements; + + std::vector<PermuteWorkerTask> tasks; + auto start = 0; + for (auto i = 0; i < thread_count; ++i) + { + int end = start + (num_elements - start) / (thread_count - i); + tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size, + start * data_size, (end - start) * data_size); + start = end; + } + assert(tasks.size() >= 1); + _tasks_map[src] = std::move(tasks); + } + else + { + auto loop_shape = src_tensor.getShape(); + + auto copy_axis = loop_shape.rank() - 1; + copy_axis = copy_axis < 0 ? 1 : copy_axis; + const auto copy_len = loop_shape.dim(copy_axis) * data_size; + loop_shape.dim(copy_axis) = 1; + + appendPermuteTasks(src, dst, loop_shape, copy_len); + } + } + else + { + assert(src_tensor.getShape().rank() == 4 && + (permute_type == PermuteType::NHWC_TO_NCHW || + permute_type == PermuteType::NCHW_TO_NHWC)); + const auto loop_shape = src_tensor.getShape(); + const auto copy_len = data_size; + + appendPermuteTasks(src, dst, loop_shape, copy_len); + } + }); + }; + src->access(fn); + src_it++; + dst_it++; + src_offsets_it++; + dst_offsets_it++; + } + } +} + +void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor, + const ir::Shape &loop_shape, size_t size) +{ + size_t distributed_dim = 0; + auto src_shape = src_tensor->getShape(); + if (src_tensor->layout() == dst_tensor->layout()) + { + for (int i = 1; i < src_shape.rank() - 1; ++i) + { + distributed_dim = src_shape.dim(distributed_dim) < src_shape.dim(i) ? i : distributed_dim; + } + } + const auto distributed_dim_val = src_shape.dim(distributed_dim); + const int thread_count = + _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val) + ? _external_context->ruy_context()->max_num_threads() + : distributed_dim_val; + // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be + // created in the context's thread pool + assert(thread_count <= _external_context->ruy_context()->max_num_threads()); + + std::vector<PermuteWorkerTask> tasks; + int start = 0; + auto one_thread_loop_shape = loop_shape; + for (auto i = 0; i < thread_count; ++i) + { + ir::Coordinates start_coords(one_thread_loop_shape.rank()); + start_coords.set(distributed_dim, start); + int end = start + (distributed_dim_val - start) / (thread_count - i); + one_thread_loop_shape.dim(distributed_dim) = end - start; + tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size); + start = end; + } + assert(tasks.size() >= 1); + _tasks_map[src_tensor] = std::move(tasks); +} + +void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer) +{ + assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <= + src->total_size()); + std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src); + for (size_t i = 0; i < tasks.size(); ++i) + { + tasks.at(i).setBuffers(src->buffer(), dst_buffer); + } + assert(tasks.size() >= 1); + _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data()); +} + +void PermuteLayer::run() +{ + assert(_src_tensors.size() == _dst_tensors.size()); + // PermuteLayer infers dynamic shape inside itself whenever run is called for the following + // reasons: + // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends + // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other + // subgraphs(with other backends) + // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2 + // reasons + + // check if output is not dynamic + for (size_t i = 0; i < _src_tensors.size(); ++i) + { + auto dst_tensor = _dst_tensors.at(i); + auto src_tensor = _src_tensors.at(i); + if (src_tensor->is_dynamic() || dst_tensor->is_dynamic()) + { + // getting output shape + auto src_shape = src_tensor->getShape(); + + // set output shape and output buffer + ir::Shape new_shape = + exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout()); + + try + { + if (!dst_tensor->applyShape(new_shape)) + throw std::runtime_error{ + "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"}; + assert(dst_tensor->buffer() != nullptr); + } + catch (const std::out_of_range &e) + { + std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support " + "dynamic tensor" + << '\n'; + throw; + } + } + assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) == + dst_tensor->getShape()); + } + assert(_src_tensors.size() == _dst_tensors.size()); + assert(_src_tensors.size() == _src_tensors_offsets.size()); + assert(_dst_tensors.size() == _dst_tensors_offsets.size()); + auto src_it = _src_tensors.begin(); + auto dst_it = _dst_tensors.begin(); + auto src_offsets_it = _src_tensors_offsets.begin(); + auto dst_offsets_it = _dst_tensors_offsets.begin(); + while (src_it != _src_tensors.end()) + { + auto src = *src_it; + auto dst = *dst_it; + auto &src_offsets = *src_offsets_it; + auto &dst_offsets = *dst_offsets_it; + + if (src->total_size() == 0) + { + assert(dst->total_size() == 0); + } + else + { + if (src != dst) + { + // Conditions to run permutation with multithreading + // 1. The tasks for multithreathing was created + // 2. The tasks's size > 1 + // 3. Both tensors are not dynamic + // 4. Data types of both tensors are different + if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 || + src->is_dynamic() || dst->is_dynamic() || + underlying_type(src->data_type()) != underlying_type(dst->data_type())) + { + permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets); + } + // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer + else if (dst->needMemoryMap() && !dst->is_subtensor()) + { + if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout()) + { + // This is more effective than multi-threading + src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); }); + } + else + { + // TODO Optimize this block in case of that padding size of dst is big. + _buffers_map[dst].reserve(dst->total_size()); + auto dst_buffer = _buffers_map[dst].data(); + + src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); }); + dst->enqueueWriteBuffer(dst_buffer, false); + } + } + else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() && + !dst->has_padding() && src->layout() == dst->layout()) + { + // This is more effective than multi-threading + assert(!dst->needMemoryMap()); + dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); }); + } + else + { + auto fn = [&](backend::ITensor &) { + dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); }); + }; + src->access(fn); + } + } + } + src_it++; + dst_it++; + src_offsets_it++; + dst_offsets_it++; + } +} + +} // namespace kernel +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h new file mode 100644 index 000000000..227e32434 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ +#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ + +#include "../ExternalContext.h" +#include "../../../exec/IPermuteFunction.h" + +#include <ruy/thread_pool.h> // from @ruy + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace kernel +{ + +class PermuteLayer : public onert::exec::IPermuteFunction +{ +public: + PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors, + const std::shared_ptr<ExternalContext> &external_context); + + void optimize() override; + + void run() override; + +private: + std::shared_ptr<ExternalContext> _external_context; + +private: + void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor, + const ir::Shape &loop_shape, size_t size); + + void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer); + + struct PermuteWorkerTask : ruy::Task + { + using Strides = ir::Coordinates; + + PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor, + const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size) + : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()}, + _src_start_offset{src_tensor.calcOffset(start_coords)}, + _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{}, + _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()}, + _dst_layout{dst_tensor.layout()}, _is_permutation{true} + { + // Set strides + setStrides(src_tensor, &_src_strides); + setStrides(dst_tensor, &_dst_strides); + + _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4); + } + // Constructor for a copy + PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset, + uint32_t dst_start_offset, size_t size) + : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset}, + _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, + _loop_shape{1}, _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false} + { + // DO NOTHING + } + void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer) + { + _src_buffer = src_buffer; + _dst_buffer = dst_buffer; + } + void Run() override + { + ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) { + size_t src_offset = _src_start_offset; + size_t dst_offset = _dst_start_offset; + assert(static_cast<size_t>(_loop_shape.rank()) == coords.size()); + ir::Coordinates dst_coords = coords; + if (_is_permutation) + { + dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout); + } + for (auto i = 0; i < _loop_shape.rank(); ++i) + { + assert(coords[i] >= 0 && dst_coords[i] >= 0); + src_offset += coords[i] * _src_strides[i]; + dst_offset += dst_coords[i] * _dst_strides[i]; + } + memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size); + }); + } + + private: + void setStrides(const ITensor &tensor, Strides *strides) + { + auto shape = tensor.getShape(); + const size_t rank = shape.rank(); + for (size_t i = 0; i < rank; ++i) + { + ir::Coordinates no_step(rank), one_step(rank); + one_step.set(i, 1); + if (shape.dim(i) > 1) + { + strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step)); + } + else + { + // If dimension value is 0 or 1, the stride of the dimension will be not used + // Do not call calcOffset() with coordinate value that is greater than dimension value + strides->set(i, 0); + } + assert((*strides)[i] >= 0); + } + } + + private: + const uint8_t *_src_buffer; + uint8_t *_dst_buffer; + size_t _src_start_offset; + size_t _dst_start_offset; + Strides _src_strides; + Strides _dst_strides; + const ir::Shape _loop_shape; + const size_t _size; + const ir::Layout _src_layout; + const ir::Layout _dst_layout; + bool _is_permutation; + }; + std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map; +}; + +} // namespace kernel +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc new file mode 100644 index 000000000..8b00db468 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "WhileLayer.h" + +#include "PermuteLayer.h" +#include "../../../exec/ExecutorBase.h" + +#include <misc/polymorphic_downcast.h> + +#include <algorithm> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace kernel +{ + +WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, + const ir::SubgraphIndex &cond_subg_index, + const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors, + const ir::ModelIndex &model_index, + basic::DynamicMemoryManager *dyn_memory_manager, + const std::shared_ptr<ExternalContext> &external_context) + : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, + _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors}, + _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{ + external_context} +{ + // At this point, executors may not have executors of cond subg and body subg +} + +void WhileLayer::run() +{ + // Copy "_input_tensors" -> "cond subg inputs" + // Run cond subg + // Start loop while output of cond subg is ture + // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg + // outputs" -> "body subg inputs" in the second or more iterations + // // Run body subg + // // Copy "body subg outputs" -> "cond subg inputs" + // // Run cond subg + // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> + // "_dst_tensors" + auto cond_exec = _executors->at(_model_index, _cond_subg_index); + auto body_exec = _executors->at(_model_index, _body_subg_index); + + // Need a temp tensor to hold the cond subgraph output + assert(cond_exec->getOutputTensors().size() == 1); + auto cond_output_tensor = [&]() { + auto cond_output = cond_exec->getOutputTensors().at(0); + auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + return tensor; + }(); + + VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; + cond_exec->execute(_input_tensors, {cond_output_tensor.get()}); + VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; + + auto getResultCond = [](backend::ITensor *tensor) -> bool { + bool ret = false; + tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); + return ret; + }; + + std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end()); + std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end()); + // Copying body inputs to outputs when the loop body is never executed + if (!getResultCond(cond_output_tensor.get())) + { + PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context}; + copy_body_inputs_to_op_outputs.run(); + return; + } + + // Need some temp tensors to hold the body subgraph output + std::vector<std::unique_ptr<Tensor>> temp_outputs_o; + std::vector<IPortableTensor *> temp_outputs; + for (auto &&io_tensor : body_exec->getOutputTensors()) + { + auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + temp_outputs.push_back(tensor.get()); + temp_outputs_o.push_back(std::move(tensor)); + } + + std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end()); + PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context}; + + const auto body_execute_with_op_inputs = [&]() { + VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; + body_exec->execute(_input_tensors, temp_outputs); + VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; + }; + + const auto body_execute_with_body_outputs = [&]() { + VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; + body_exec->execute(_output_tensors, temp_outputs); + VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; + }; + + std::function<void()> body_execute = body_execute_with_op_inputs; + const auto cond_execute = [&]() { + VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; + cond_exec->execute(_output_tensors, {cond_output_tensor.get()}); + VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; + }; + + // Loop while Cond subgraph's output is true + while (getResultCond(cond_output_tensor.get())) + { + body_execute(); + copy_body_outputs_to_op_outputs.run(); + cond_execute(); + body_execute = body_execute_with_body_outputs; + } + + // Clean-up the temp tensors + _dyn_memory_manager->deallocate(cond_output_tensor.get()); + for (auto &&tensor : temp_outputs) + { + _dyn_memory_manager->deallocate(tensor); + } +} + +} // namespace kernel +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h index ebca8acdc..40ca4fe23 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h @@ -14,20 +14,23 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ -#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ +#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__ +#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__ -#include <backend/ITensor.h> -#include <exec/IExecutor.h> +#include <backend/IPortableTensor.h> +#include <exec/IExecutors.h> #include <exec/IFunction.h> #include <ir/OperandIndexSequence.h> #include <ir/Graph.h> +#include "../ExternalContext.h" + +#include "backend/basic/MemoryManager.h" namespace onert { namespace backend { -namespace controlflow +namespace builtin { namespace kernel { @@ -35,12 +38,12 @@ namespace kernel class WhileLayer : public ::onert::exec::IFunction { public: - WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, - const exec::DynAllocInfoMap &outputs_dyn_alloc_info, + WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors, + const std::vector<backend::IPortableTensor *> output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, - exec::ExecutorMap *executor_map); + exec::IExecutors *executors, const ir::ModelIndex &model_index, + basic::DynamicMemoryManager *dyn_memory_manager, + const std::shared_ptr<ExternalContext> &external_context); public: void run() override; @@ -48,17 +51,17 @@ public: private: const ir::SubgraphIndex _cond_subg_index; const ir::SubgraphIndex _body_subg_index; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; - const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors; - const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors; - const exec::DynAllocInfoMap _outputs_dyn_alloc_info; - exec::ExecutorMap *_executor_map; + const std::vector<backend::IPortableTensor *> _input_tensors; + const std::vector<backend::IPortableTensor *> _output_tensors; + exec::IExecutors *_executors; + const ir::ModelIndex _model_index; + basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors + const std::shared_ptr<ExternalContext> _external_context; }; } // namespace kernel -} // namespace controlflow +} // namespace builtin } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ +#endif // __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.cc b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc new file mode 100644 index 000000000..fa9131f4d --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "backend/basic/train/TrainableBackendContextHelpers.h" +#include "exec/FunctionSequence.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +backend::ITensorRegistry *BackendContext::genTensors() +{ + // For now, there is no need to generate tensors for forwarding. + // builtin train backend handles 3 operators: `Permute`, `IF`, `WHILE`. + // `Permute`: Tensor generation is not required. + // `IF`, `WHILE`: Not supported yet + return tensor_registry().get(); +} + +backend::train::ITensorRegistry *BackendContext::genTrainingTensors() +{ + // For now, there is no need to generate tensors for backwarding. + return tensor_registry().get(); +} + +backend::train::FunctionMap BackendContext::genKernels() +{ + backend::train::FunctionMap ret; + + for (auto &&op_ind : _tdata->op_order) + { + auto tn_seq = kernel_gen->generate(op_ind); + ret.emplace_back(op_ind, std::move(tn_seq)); + } + + trainable_graph()->operands().iterate( + [&](const ir::OperandIndex &ind, const ir::Operand &operand) { + if (!external_operands().contains(ind) && operand.isConstant()) + { + throw std::runtime_error( + "BackendContext: builtin backend does not support updatable weights yet"); + } + }); + + // TODO Enable prepare() + // for (auto &&it : ret) + // { + // auto &fn_seq = it.second; + // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + // } + + return ret; +} + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.h b/runtime/onert/core/src/backend/builtin/train/BackendContext.h new file mode 100644 index 000000000..6f8ce4cae --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ + +#include <backend/train/TrainableBackendContext.h> + +#include "KernelGenerator.h" +#include "../ExternalContext.h" +#include "../TensorBuilder.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +class BackendContext : public backend::train::TrainableBackendContext +{ +public: + BackendContext(const backend::train::ITrainableBackend *backend, + std::unique_ptr<backend::train::TrainableContextData> &&data, + std::shared_ptr<backend::train::ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : backend::train::TrainableBackendContext(backend, std::move(data), tensor_registry), + kernel_gen{kernel_gen}, + _external_context(new ExternalContext), _tensor_builder{tensor_builder} + { + } + + backend::ITensorRegistry *genTensors() override; + backend::train::ITensorRegistry *genTrainingTensors() override; + +public: + backend::train::FunctionMap genKernels() override; + + std::shared_ptr<ExternalContext> external_context() { return _external_context; } + +public: + // TODO Make it private + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr<ExternalContext> _external_context; + +private: + std::shared_ptr<TensorBuilder> _tensor_builder; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc new file mode 100644 index 000000000..6f2c0a3b9 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "kernel/PermuteLayer.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context) + : KernelGeneratorBase{tgraph}, _tensor_reg{tensor_reg}, _external_context(external_context) +{ +} + +std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::OperationIndex ind) +{ + auto ret = std::make_unique<exec::train::TrainableFnSequence>(); + const auto &op = _tgraph.operation(ind); + op.accept(*this); + // _return_fn must have been generated + if (_return_fn == nullptr) + { + throw std::runtime_error(op.name() + " op does not supported trainable kernel yet"); + } + + ret->_functions.emplace_back(std::move(_return_fn)); + + return ret; +} + +void KernelGenerator::visit(const ir::train::operation::Permute &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + // Add PermuteLayer + std::vector<ITensor *> output_tensors{getTensor(output_index)}; + std::vector<ITensor *> input_tensors{getTensor(input_index)}; + + std::vector<ITensor *> output_deriv_tensors; + std::vector<ITensor *> input_deriv_tensors; + + auto input_deriv_tensor = getDerivativeTensor(input_index); + auto output_deriv_tensor = getDerivativeTensor(output_index); + output_deriv_tensors.emplace_back(output_deriv_tensor); + input_deriv_tensors.emplace_back(input_deriv_tensor); + + // NOTE IOTensors of graph outputs for passing data to users must be ignored in training + // because the buffers of those IOTensors are unnecessary and nullptr + bool ignore_forward_in_training = _whole_graph_outputs.contains(output_index); + auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, + input_deriv_tensors, output_deriv_tensors, + ignore_forward_in_training, _external_context); + + _return_fn = std::move(fn); +} + +backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index) +{ + // Get Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getITensor(index); + assert(ret != nullptr); + return ret; +} + +backend::ITensor *KernelGenerator::getDerivativeTensor(const ir::OperandIndex &index) +{ + // Get derivative Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getDerivativeITensor(index); + return ret; +} + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h new file mode 100644 index 000000000..d8781c0d0 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ + +#include "../ExternalContext.h" +#include "../train/TensorRegistry.h" +#include "../../../compiler/train/TensorRegistries.h" + +#include <backend/train/KernelGeneratorBase.h> +#include <exec/train/TrainableFnSequence.h> +#include <ir/train/TrainableGraph.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +class KernelGenerator : public backend::train::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::train::TrainableGraph &tgraph, + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<ExternalContext> &external_context); + + std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex ind) override; + + void setTensorRegistries(const compiler::train::TensorRegistries &tensor_registries) + { + _tensor_registries = tensor_registries; + } + + void setWholeGraphOutputs(const ir::OperandIndexSequence &outputs) + { + _whole_graph_outputs = outputs; + } + +private: + void visit(const ir::train::operation::Permute &) override; + +private: + backend::ITensor *getTensor(const ir::OperandIndex &index); + backend::ITensor *getDerivativeTensor(const ir::OperandIndex &index); + +private: + std::shared_ptr<TensorRegistry> _tensor_reg; + compiler::train::TensorRegistries _tensor_registries; + const std::shared_ptr<ExternalContext> _external_context; + ir::OperandIndexSequence _whole_graph_outputs; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/Tensor.h b/runtime/onert/core/src/backend/builtin/train/Tensor.h new file mode 100644 index 000000000..611407bd2 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/Tensor.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ + +#include <backend/basic/train/TrainableTensor.h> + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +using TrainableTensor = basic::train::TrainableTensor; +using DerivativeTensor = basic::Tensor; +using GradientTensor = basic::Tensor; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h new file mode 100644 index 000000000..c48e5fe93 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ + +#include <backend/train/ITensorRegistry.h> + +#include "../IOTensor.h" +#include "../Tensor.h" +#include "Tensor.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ + +using BaseTensorRegistry = + backend::train::PortableTensorRegistryTemplate<Tensor, TrainableTensor, DerivativeTensor, + GradientTensor>; + +class TensorRegistry : public backend::train::ITensorRegistry +{ +public: + TensorRegistry() : _base_reg{new BaseTensorRegistry} {} + + ITensor *getITensor(const ir::OperandIndex &index) override + { + auto base_tensor = _base_reg->getITensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + ITensor *getNativeITensor(const ir::OperandIndex &index) override + { + auto base_tensor = _base_reg->getNativeITensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + IPortableTensor *getPortableTensor(const ir::OperandIndex &index) + { + auto base_tensor = _base_reg->getPortableTensor(index); + if (base_tensor) + return base_tensor; + return getNativeIOTensor(index); + } + + IOTensor *getNativeIOTensor(const ir::OperandIndex &index) + { + auto tensor = _native_io_tensors.find(index); + if (tensor != _native_io_tensors.end()) + return tensor->second.get(); + return nullptr; + } + + ITensor *getDerivativeITensor(const ir::OperandIndex &index) override + { + return _base_reg->getDerivativeTensor(index); + } + + ITensor *getGradientITensor(const ir::OperandIndex &index) override + { + return _base_reg->getGradientTensor(index); + } + + DerivativeTensor *getDerivativeTensor(const ir::OperandIndex &index) + { + return _base_reg->getDerivativeTensor(index); + } + + bool setMigrantTensor(const ir::OperandIndex &index, IPortableTensor *tensor) override + { + assert(tensor); + assert(!getITensor(index)); // For the index, tensor is not registered yet + _base_reg->setMigrantTensor(index, tensor); + return true; + } + + void setDerivativeTensor(const ir::OperandIndex &index, std::unique_ptr<DerivativeTensor> tensor) + { + _base_reg->setDerivativeTensor(index, std::move(tensor)); + } + + void setGradientTensor(const ir::OperandIndex &index, std::unique_ptr<GradientTensor> tensor) + { + _base_reg->setGradientTensor(index, std::move(tensor)); + } + + void setNativeIOTensor(ir::OperandIndex index, std::unique_ptr<IOTensor> &&tensor) + { + assert(tensor); + assert(!getITensor(index)); // For the index, tensor is not registered yet + _native_io_tensors[index] = std::move(tensor); + } + + const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors() + { + return _native_io_tensors; + } + std::shared_ptr<BaseTensorRegistry> base_reg() { return _base_reg; } + +private: + std::shared_ptr<BaseTensorRegistry> _base_reg; + ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors; +}; + +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc new file mode 100644 index 000000000..929092dde --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc @@ -0,0 +1,85 @@ + + +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ +namespace kernel +{ + +PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors, + const std::vector<ITensor *> &dst_tensors, + const std::vector<ITensor *> &input_deriv_tensors, + const std::vector<ITensor *> &output_deriv_tensors, + bool ignore_forward_in_training, + const std::shared_ptr<ExternalContext> &external_context) + : builtin::kernel::PermuteLayer{src_tensors, dst_tensors, external_context}, + _input_deriv_tensors{input_deriv_tensors}, _output_deriv_tensors{output_deriv_tensors}, + _ignore_forward_in_training{ignore_forward_in_training} +{ + assert(input_deriv_tensors.size() == output_deriv_tensors.size()); + assert(src_tensors.size() == dst_tensors.size()); +} + +void PermuteLayer::optimize() +{ + builtin::kernel::PermuteLayer::optimize(); + + // TODO Calculate offsets of derivative tensors if necessary +} + +void PermuteLayer::forward(bool training) +{ + if (training && _ignore_forward_in_training) + return; + + builtin::kernel::PermuteLayer::run(); +} + +void PermuteLayer::backward() +{ + for (uint32_t i = 0; i < _output_deriv_tensors.size(); ++i) + { + auto src_deriv = _output_deriv_tensors.at(i); + auto dst_deriv = _input_deriv_tensors.at(i); + + // NOTE The derivative tensors corresponding to inputs/outputs of model are nullptr + // because permuting those tensors is meaningless + if (src_deriv && dst_deriv) + { + const auto rank = src_deriv->getShape().rank(); + auto output_offsets = _dst_tensors_offsets.at(i); + auto input_offsets = _src_tensors_offsets.at(i); + + exec::IPermuteFunction::permute(src_deriv, dst_deriv, rank, output_offsets, input_offsets); + } + } +} + +} // namespace kernel +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h new file mode 100644 index 000000000..de8063a21 --- /dev/null +++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ +#define __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ + +#include "../../kernel/PermuteLayer.h" + +#include "exec/train/ITrainableFunction.h" + +namespace onert +{ +namespace backend +{ +namespace builtin +{ +namespace train +{ +namespace kernel +{ + +class PermuteLayer : public builtin::kernel::PermuteLayer, public exec::train::ITrainableFunction +{ +public: + PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors, + const std::vector<ITensor *> &input_deriv_tensors, + const std::vector<ITensor *> &output_deriv_tensors, bool ignore_forward_in_training, + const std::shared_ptr<ExternalContext> &external_context); + + void optimize() override; + + void forward(bool training) override; + void backward() override; + +private: + std::vector<ITensor *> _input_deriv_tensors; + std::vector<ITensor *> _output_deriv_tensors; + bool _ignore_forward_in_training; +}; + +} // namespace kernel +} // namespace train +} // namespace builtin +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__ diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h deleted file mode 100644 index e21a8f357..000000000 --- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ - -#include "TensorRegistry.h" - -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ - -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<ITensorRegistry> &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} - { - } - -private: - std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr<ITensorRegistry> _tensor_reg; -}; - -} // namespace controlflow -} // namespace backend -} // namespace onert - -#endif // __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc deleted file mode 100644 index 1288e4c96..000000000 --- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DynamicTensorManager.h" - -#include "util/logging.h" -#include "util/Exceptions.h" -#include "ir/DataType.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ - -DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors) - : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors} -{ - // DO NOTHING -} - -void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) -{ - // NOTE Handle user tensors first - auto user_tensor = _tensors->getNativeUserTensor(ind); - if (user_tensor) - { - // User tensors cannot be reallocated. - auto buffer_size = user_tensor->total_size(); - auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type()); - if (buffer_size < new_size) - throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"}; - user_tensor->setShape(new_shape); - return; - } - - // NOTE Then handle own tensors - auto tensor = _tensors->getNativeOwnTensor(ind); - assert(tensor); - - bool previously_dynamic = tensor->is_dynamic(); - - auto allocTensorMem = [&](bool overwrite = false) { - auto capacity = tensor->total_size(); - auto alloc = _dynamic_mem_mgr->allocate(ind, capacity); - - if (overwrite) - tensor->overwriteBuffer(alloc); - else - tensor->setBuffer(alloc); - }; - - if (!previously_dynamic) - { - // TODO deallocate tensor->buffer() - // issue is that staticTensorManager might have allocate this memory - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(true); - } - else if (tensor->buffer() == nullptr) - { - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(); - } - // when buffer was already allocated and new_shape requires different size - else - { - auto previous_size = tensor->total_size(); - auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type()); - if (previous_size != new_size) - { - _dynamic_mem_mgr->deallocate(ind); - - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(true); - } - else - { // when buffer with same size was already allocated, shape could differ - tensor->setShape(new_shape); - } - } -} - -void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind, - const ir::OperandInfo &tensor_info, - ir::Layout backend_layout) -{ - auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this); - _tensors->setNativeOwnTensor(ind, tensor); -} - -void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) -{ - _dealloc_tensor_map[op_ind].emplace(operand_ind); -} - -void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind) -{ - auto find = _dealloc_tensor_map.find(op_ind); - if (find == _dealloc_tensor_map.end()) - return; - - auto &input_set = find->second; - for (auto input_ind : input_set) - { - if (!_tensors->getNativeTensor(input_ind)->is_dynamic()) - continue; - - _dynamic_mem_mgr->deallocate(input_ind); - VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value() - << " (input of op_ind: " << op_ind.value() << ")" << std::endl; - } -} - -void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind) -{ - if (!_tensors->getNativeTensor(output_ind)->is_dynamic()) - return; - - _dynamic_mem_mgr->deallocate(output_ind); - VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value() - << " (output of a subgraph)" << std::endl; -} - -} // namespace controlflow -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h deleted file mode 100644 index dbe388ba2..000000000 --- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__ -#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__ - -#include "TensorRegistry.h" -#include "Tensor.h" - -#include <backend/IDynamicTensorManager.h> -#include <backend/cpu_common/MemoryManager.h> -#include <ir/OperandInfo.h> -#include <ir/Operation.h> -#include <ir/Index.h> - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ - -/** - * @brief Class to manage dynamic tensor and its memory - */ -class DynamicTensorManager : public backend::IDynamicTensorManager -{ -public: - DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors); - - virtual ~DynamicTensorManager() = default; - - void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override; - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, - ir::Layout backend_layout); - - void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override; - void deallocInput(ir::OperationIndex op_ind) override; - void deallocSubgraphOutput(ir::OperandIndex ind) override; - -private: - /** - * @brief Memory manager for dynamic tensor. - * @todo DynamicMemoryManager is not optimized. Optimized one is needed - */ - std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr; - const std::shared_ptr<TensorRegistry> _tensors; - - // contains list of dynamic tensor index, which can be deallocated after running operation - // note: this map could contain static tensor index too. Careful use is required. - std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map; -}; - -} // namespace controlflow -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__ diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc deleted file mode 100644 index de5a6a5f6..000000000 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "KernelGenerator.h" - -#include <backend/BackendContext.h> -#include <util/Utils.h> -#include "kernel/IfLayer.h" -#include "kernel/WhileLayer.h" -#include "kernel/PermuteLayer.h" -#include "exec/ExecutorBase.h" -#include "exec/FunctionSequence.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ - -KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, - const std::shared_ptr<TensorRegistry> &tensor_reg) - : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg}, - _tensor_registries{}, _executor_map{nullptr} -{ - UNUSED_RELEASE(_graph); - UNUSED_RELEASE(_tensor_registries); - UNUSED_RELEASE(_executor_map); -} - -void KernelGenerator::visit(const ir::OpSequence &op_seq) -{ - assert(!_return_fn_seq); - assert(_dyn_tensor_manager); - assert(_tensor_reg); - - auto dyn_shape_inferer = - std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg); - - _return_fn_seq = std::make_unique<exec::FunctionSequence>(); - - // Prepare to handle dynamic tensors later - auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>(); - { - dyn_ctx->op_seq = &op_seq; - dyn_ctx->operations = &_graph.operations(); - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - dyn_ctx->tensor_registry = _tensor_reg; - dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager; - - _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); - } - _return_fn_seq->enableDynamicShapeInferer(true); - - for (const auto &op_idx : op_seq.operations()) - { - const auto &node = _graph.operations().at(op_idx); - node.accept(*this); - _return_fn_seq->append(releaseFunction()); - } -} - -void KernelGenerator::visit(const ir::operation::If &node) -{ - const auto then_subg_index = node.param().then_subg_index; - const auto else_subg_index = node.param().else_subg_index; - - std::vector<std::shared_ptr<backend::ITensor>> input_tensors; - for (const auto input_index : node.getInputs()) - { - auto input_tensor = getTensor(input_index); - - input_tensors.emplace_back(input_tensor); - } - - std::vector<std::shared_ptr<backend::ITensor>> output_tensors; - exec::DynAllocInfoMap outputs_dyn_alloc_info; - for (const auto output_index : node.getOutputs()) - { - auto output_tensor = getTensor(output_index); - - output_tensors.emplace_back(output_tensor); - outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index}; - } - - // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of - // creating executor recusively - const auto cond_tensor = input_tensors.front(); - input_tensors.erase(input_tensors.begin()); - auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>( - cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info, - then_subg_index, else_subg_index, _executor_map); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Permute &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - // Add PermuteLayer - std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)}; - std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)}; - std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info; - outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index}; - - auto fn = - std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::While &node) -{ - const auto cond_subg_index = node.param().cond_subg_index; - const auto body_subg_index = node.param().body_subg_index; - - // This op does not support input as a constant, because controlflow backend does not have - // TensorBuilder - std::vector<std::shared_ptr<backend::ITensor>> input_tensors; - for (const auto input_index : node.getInputs()) - { - auto input_tensor = getTensor(input_index); - - input_tensors.emplace_back(input_tensor); - } - - std::vector<std::shared_ptr<backend::ITensor>> output_tensors; - std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info; - for (const auto output_index : node.getOutputs()) - { - auto output_tensor = getTensor(output_index); - - output_tensors.emplace_back(output_tensor); - - outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index}; - } - - // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of - // creating executor recusively - auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>( - input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info, - cond_subg_index, body_subg_index, _executor_map); - - _return_fn = std::move(fn); -} - -std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index) -{ - std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index); - assert(ret != nullptr); - return ret; -} - -} // namespace controlflow -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h deleted file mode 100644 index 678c5b73b..000000000 --- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ -#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ - -#include "backend/cpu_common/TensorRegistry.h" -#include "backend/ITensorRegistry.h" -#include "Tensor.h" -#include "UserTensor.h" -#include <assert.h> - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ - -/** - * @brief Tensor registry class for controlflow backend - * - * This class contains three types of tensors. Two native tensors(tensors that are managed by this - * backend) and the other is migrant tensor. - * - * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given - * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) - * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg ) - * - * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager - * - */ -class TensorRegistry : public ITensorRegistry -{ -public: - TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {} - - std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override - { - auto base_tensor = _base_reg->getITensor(ind); - if (base_tensor) - return base_tensor; - return getNativeUserTensor(ind); - } - - std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override - { - auto base_tensor = _base_reg->getNativeITensor(ind); - if (base_tensor) - return base_tensor; - return getNativeUserTensor(ind); - } - - std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind) - { - auto base_tensor = _base_reg->getPortableTensor(ind); - if (base_tensor) - return base_tensor; - return getNativeUserTensor(ind); - } - - std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind) - { - auto base_tensor = _base_reg->getNativeTensor(ind); - if (base_tensor) - return base_tensor; - return getNativeUserTensor(ind); - } - - std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind) - { - return _base_reg->getNativeTensor(ind); - } - - std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind) - { - auto tensor = _native_user_tensors.find(ind); - if (tensor != _native_user_tensors.end()) - return tensor->second; - return nullptr; - } - - bool setMigrantTensor(const ir::OperandIndex &ind, - const std::shared_ptr<IPortableTensor> &tensor) override - { - assert(tensor); - assert(!getITensor(ind)); // For the ind, tensor is not registered yet - _base_reg->setMigrantTensor(ind, tensor); - return true; - } - - void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor) - { - assert(tensor); - assert(!getITensor(ind)); // For the ind, tensor is not registered yet - _base_reg->setNativeTensor(ind, tensor); - } - - void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor) - { - assert(tensor); - assert(!getITensor(ind)); // For the ind, tensor is not registered yet - _native_user_tensors[ind] = tensor; - } - - const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors() - { - return _native_user_tensors; - } - std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; } - -private: - std::shared_ptr<cpu_common::TensorRegistry> _base_reg; - ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors; -}; - -} // namespace controlflow -} // namespace backend -} // namespace onert - -#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc deleted file mode 100644 index 8377c7183..000000000 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "IfLayer.h" - -#include <backend/ITensor.h> -#include "exec/ExecutorBase.h" -#include <misc/polymorphic_downcast.h> -#include "PermuteLayer.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ -namespace kernel -{ - -IfLayer::IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor, - const std::vector<std::shared_ptr<backend::ITensor>> input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, - const exec::DynAllocInfoMap &outputs_dyn_alloc_info, - const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map) - : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _output_indices{output_indices}, _graph{graph}, - _outputs_dyn_alloc_info{outputs_dyn_alloc_info}, _then_subg_index{then_subg_index}, - _else_subg_index{else_subg_index}, _executor_map{executor_map} -{ - // At this point, executor_map may not have executors of then subg and else subg -} - -void IfLayer::run() -{ - // Check condition - // // If true - // // // Copy _input_tensors -> then subg's inputs - // // // Run then subg - // // // Copy outputs of then subg -> _output_tensors - // // Else - // // // Copy _input_tensors -> else subg's inputs if false - // // // Run else subg - // // // Copy outputs of else subg -> _output_tensors - auto getResultCond = [](backend::ITensor *tensor) -> bool { - bool ret = false; - tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); - return ret; - }; - - exec::ExecutorBase *subg_exec = nullptr; - if (getResultCond(_cond_tensor.get())) - { - subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_then_subg_index).get()); - } - else - { - subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_else_subg_index).get()); - } - - const auto &subg_graph = subg_exec->graph(); - - std::vector<std::shared_ptr<backend::ITensor>> src_tensors; - std::vector<std::shared_ptr<backend::ITensor>> dst_tensors; - // Add tensors used in subgraph or contained in outputs of subgraph - assert(subg_graph.getInputs().size() == _input_tensors.size()); - assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size()); - for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i) - { - const auto &subg_input_index = subg_graph.getInputs().at(i); - const auto &subg_input = subg_graph.operands().at(subg_input_index); - if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index)) - { - src_tensors.emplace_back(_input_tensors.at(i)); - dst_tensors.emplace_back(subg_exec->getInputTensors().at(i)); - } - } - const auto &subg_inputs_dyn_alloc_info = subg_exec->getInputsDynamicAllocInfo(); - const auto permute_op_input_to_subg_input = - std::make_shared<PermuteLayer>(src_tensors, dst_tensors, subg_inputs_dyn_alloc_info); - - // Add tensors used as output of operation or contained in outputs of operation - src_tensors.clear(); - dst_tensors.clear(); - assert(_output_indices.size() == subg_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - for (uint32_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - src_tensors.emplace_back(subg_exec->getOutputTensors().at(i)); - dst_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_subg_output_to_op_output = - std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _outputs_dyn_alloc_info); - - // Remove copying of unused tensor - permute_op_input_to_subg_input->prepare(); - permute_subg_output_to_op_output->prepare(); - - // Copy & run - subg_exec->execute(_input_tensors, permute_op_input_to_subg_input); - permute_subg_output_to_op_output->run(); -} - -} // namespace kernel -} // namespace controlflow -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc deleted file mode 100644 index e8f1ea679..000000000 --- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "PermuteLayer.h" - -#include "exec/ShapeConverter.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ -namespace kernel -{ - -void PermuteLayer::run() -{ - assert(_src_tensors.size() == _dst_tensors.size()); - // PermuteLayer infers dynamic shape inside itself whenever run is called for the following - // reasons: - // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends - // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other - // subgraphs(with other backends) - // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2 - // reasons - - // check if output is not dynamic - for (size_t i = 0; i < _src_tensors.size(); ++i) - { - auto dst_tensor = _dst_tensors.at(i); - auto src_tensor = _src_tensors.at(i); - if (src_tensor->is_dynamic() || dst_tensor->is_dynamic()) - { - // getting output shape - auto src_shape = src_tensor->getShape(); - - // set output shape and output buffer - ir::Shape new_shape = - exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout()); - - try - { - const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind; - auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager(); - if (!dyn_tensor_manager) - throw std::runtime_error{ - "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"}; - dyn_tensor_manager->applyShape(dst_index, new_shape); - assert(dst_tensor->buffer() != nullptr); - } - catch (const std::out_of_range &e) - { - std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support " - "dynamic tensor" - << '\n'; - throw; - } - } - assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) == - dst_tensor->getShape()); - } - IPermuteFunction::run(); -} - -} // namespace kernel -} // namespace controlflow -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h deleted file mode 100644 index 403ac770d..000000000 --- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ -#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ - -#include "backend/ITensorBuilder.h" -#include "exec/IPermuteFunction.h" -#include "exec/IExecutor.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ -namespace kernel -{ - -class PermuteLayer : public onert::exec::IPermuteFunction -{ -public: - PermuteLayer(const std::vector<std::shared_ptr<ITensor>> &src_tensors, - const std::vector<std::shared_ptr<ITensor>> &dst_tensors, - const exec::DynAllocInfoMap &dst_dyn_alloc_info_map) - : _dst_dyn_alloc_info_map{dst_dyn_alloc_info_map} - { - assert(src_tensors.size() == dst_tensors.size()); - _src_tensors = src_tensors; - _dst_tensors = dst_tensors; - } - - void optimize() override - { - // Remove copying of tensor as nullptr - auto src_it = _src_tensors.begin(); - auto dst_it = _dst_tensors.begin(); - while (src_it != _src_tensors.end()) - { - if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr)) - { - src_it = _src_tensors.erase(src_it); - dst_it = _dst_tensors.erase(dst_it); - } - else - { - ++src_it; - ++dst_it; - } - } - } - - void run() override; - -private: - const exec::DynAllocInfoMap _dst_dyn_alloc_info_map; -}; - -} // namespace kernel -} // namespace controlflow -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc deleted file mode 100644 index 50936e5f6..000000000 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "WhileLayer.h" - -#include <backend/ITensor.h> -#include "exec/ExecutorBase.h" -#include <misc/polymorphic_downcast.h> -#include "PermuteLayer.h" - -namespace onert -{ -namespace backend -{ -namespace controlflow -{ -namespace kernel -{ - -WhileLayer::WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, - const exec::DynAllocInfoMap &outputs_dyn_alloc_info, - const ir::SubgraphIndex &cond_subg_index, - const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map) - : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, - _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors}, - _output_tensors{output_tensors}, _outputs_dyn_alloc_info{outputs_dyn_alloc_info}, - _executor_map{executor_map} -{ - // At this point, executor_map may not have executors of cond subg and body subg -} - -void WhileLayer::run() -{ - // Copy "_input_tensors" -> "cond subg inputs" - // Run cond subg - // Start loop while output of cond subg is ture - // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg - // outputs" -> "body subg inputs" in the second or more iterations - // // Run body subg - // // Copy "body subg outputs" -> "cond subg inputs" - // // Run cond subg - // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> - // "_dst_tensors" - auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_cond_subg_index).get()); - auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>( - _executor_map->at(_body_subg_index).get()); - - const auto &cond_graph = cond_exec->graph(); - const auto &cond_inputs_dyn_alloc = cond_exec->getInputsDynamicAllocInfo(); - const auto &body_graph = body_exec->graph(); - const auto &body_inputs_dyn_alloc = body_exec->getInputsDynamicAllocInfo(); - - std::vector<std::shared_ptr<backend::ITensor>> input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> cond_input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> body_input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> body_output_tensors; - std::vector<std::shared_ptr<backend::ITensor>> output_tensors; - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == _input_tensors.size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - input_tensors.emplace_back(_input_tensors.at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_op_input_to_cond_input = - std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, cond_inputs_dyn_alloc); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == _input_tensors.size()); - assert(_output_indices.size() == _output_tensors.size()); - input_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - input_tensors.emplace_back(_input_tensors.at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_op_input_to_op_output = - std::make_shared<PermuteLayer>(input_tensors, output_tensors, _outputs_dyn_alloc_info); - - // Add all tensors with unused tensors in body subgraph because unused input tensors will be - // copied output tensors in body subgraph - assert(_input_tensors.size() == body_exec->getInputTensors().size()); - input_tensors = _input_tensors; - body_input_tensors = body_exec->getInputTensors(); - const auto permute_op_input_to_body_input = - std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, body_inputs_dyn_alloc); - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - body_output_tensors.clear(); - cond_input_tensors.clear(); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_cond_input = std::make_shared<PermuteLayer>( - body_output_tensors, cond_input_tensors, cond_inputs_dyn_alloc); - - // Add only used tensors in body subgraph - assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(body_graph.getInputs().size() == body_exec->getInputTensors().size()); - body_output_tensors.clear(); - body_input_tensors.clear(); - for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i) - { - const auto &body_input_index = body_graph.getInputs().at(i); - const auto &body_input = body_graph.operands().at(body_input_index); - if (body_input.getUses().size() > 0 && - !body_exec->graph().getOutputs().contains(body_input_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - body_input_tensors.emplace_back(body_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_body_input = std::make_shared<PermuteLayer>( - body_output_tensors, body_input_tensors, body_inputs_dyn_alloc); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == body_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - body_output_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_body_output_to_op_output = - std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _outputs_dyn_alloc_info); - - // Remove copying of unused tensor - permute_op_input_to_cond_input->prepare(); - permute_op_input_to_op_output->prepare(); - permute_op_input_to_body_input->prepare(); - permute_body_output_to_cond_input->prepare(); - permute_body_output_to_body_input->prepare(); - permute_body_output_to_op_output->prepare(); - - cond_exec->execute(_input_tensors, permute_op_input_to_cond_input); - - assert(cond_exec->getOutputTensors().size() == 1); - auto &cond_output_tensor = cond_exec->getOutputTensors().at(0); - auto getResultCond = [](backend::ITensor *tensor) -> bool { - bool ret = false; - tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); }); - return ret; - }; - - const auto body_execute_with_op_inputs = [&]() { - body_exec->execute(_input_tensors, permute_op_input_to_body_input); - }; - - const auto body_execute_with_body_outputs = [&]() { - body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input); - }; - - std::function<void()> body_execute = body_execute_with_op_inputs; - const auto cond_execute = [&]() { - cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input); - }; - auto permute_to_outputs_fn = permute_op_input_to_op_output; - - // Loop while Cond subgraph's output is true - while (getResultCond(cond_output_tensor.get())) - { - body_execute(); - cond_execute(); - body_execute = body_execute_with_body_outputs; - permute_to_outputs_fn = permute_body_output_to_op_output; - } - permute_to_outputs_fn->run(); -} - -} // namespace kernel -} // namespace controlflow -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc deleted file mode 100644 index f7ce3d011..000000000 --- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/cpu_common/DynamicTensorManager.h" - -#include "util/logging.h" - -namespace onert -{ -namespace backend -{ -namespace cpu_common -{ - -DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®) - : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg} -{ - // DO NOTHING -} - -void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) -{ - VERBOSE_F() << ind << std::endl; - - auto tensor = _tensors->getNativeTensor(ind); - assert(tensor); - - bool previously_dynamic = tensor->is_dynamic(); - - auto allocTensorMem = [&](bool overwrite = false) { - auto capacity = tensor->total_size(); - auto alloc = _dynamic_mem_mgr->allocate(ind, capacity); - - if (overwrite) - tensor->overwriteBuffer(alloc); - else - tensor->setBuffer(alloc); - }; - - if (!previously_dynamic) - { - // TODO deallocate tensor->buffer() - // issue is that staticTensorManager might have allocate this memory - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(true); - } - else if (tensor->buffer() == nullptr) - { - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(); - } - // when buffer was already allocated and new_shape requires different size - else - { - auto previous_size = tensor->total_size(); - auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type()); - if (previous_size != new_size) - { - _dynamic_mem_mgr->deallocate(ind); - - tensor->setShape(new_shape); - tensor->set_dynamic(); - allocTensorMem(true); - } - else - { // when buffer with same size was already allocated, shape could differ - tensor->setShape(new_shape); - } - } -} - -void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind, - const ir::OperandInfo &tensor_info, - ir::Layout backend_layout) -{ - assert(_tensors->getNativeTensor(ind) == nullptr); - auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this); - _tensors->setNativeTensor(ind, tensor); -} - -void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) -{ - _dealloc_tensor_map[op_ind].emplace(operand_ind); -} - -void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind) -{ - auto find = _dealloc_tensor_map.find(op_ind); - if (find == _dealloc_tensor_map.end()) - return; - - auto &input_set = find->second; - for (auto input_ind : input_set) - { - auto *tensor = _tensors->getNativeTensor(input_ind).get(); - if (!tensor->is_dynamic()) - continue; - - _dynamic_mem_mgr->deallocate(input_ind); - tensor->resetBuffer(); - - VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value() - << " (input of op_ind: " << op_ind.value() << ")" << std::endl; - } -} - -void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind) -{ - auto *tensor = _tensors->getNativeTensor(output_ind).get(); - if (!tensor->is_dynamic()) - return; - - _dynamic_mem_mgr->deallocate(output_ind); - tensor->resetBuffer(); - - VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value() - << " (output of a subgraph)" << std::endl; -} - -} // namespace cpu_common -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc index db7a14a96..44442c065 100644 --- a/runtime/onert/core/src/compiler/BackendManager.cc +++ b/runtime/onert/core/src/compiler/BackendManager.cc @@ -16,22 +16,17 @@ #include "compiler/BackendManager.h" -#include <memory> -#include <dlfcn.h> +#include "../backend/builtin/Backend.h" +#include "../backend/builtin/Config.h" -#include "backend/Backend.h" -#include "backend/controlflow/Backend.h" -#include "backend/controlflow/Config.h" -#include "backend/IConfig.h" -#include "util/logging.h" -#include "util/ConfigSource.h" -#include "misc/string_helpers.h" +#include <dlfcn.h> +#include <memory> static const char *SHARED_LIB_EXT = #if defined(__APPLE__) && defined(__MACH__) - ".dylib"; + ".dylib"; #else - ".so"; + ".so"; #endif namespace onert @@ -45,20 +40,20 @@ BackendManager &BackendManager::get() return object; } -BackendManager::BackendManager() { loadControlflowBackend(); } +BackendManager::BackendManager() { loadBuiltinBackend(); } -void BackendManager::loadControlflowBackend() +void BackendManager::loadBuiltinBackend() { - auto backend_object = std::unique_ptr<backend::controlflow::Backend, backend_destroy_t>( - new backend::controlflow::Backend, [](backend::Backend *backend) { delete backend; }); + auto backend_object = std::unique_ptr<backend::builtin::Backend, backend_destroy_t>( + new backend::builtin::Backend, [](backend::Backend *backend) { delete backend; }); bool initialized = backend_object->config()->initialize(); // Call initialize here? if (!initialized) { - throw std::runtime_error(backend::controlflow::Config::ID + " backend initialization failed"); + throw std::runtime_error(backend::builtin::Config::ID + " backend initialization failed"); } - _controlflow = backend_object.get(); // Save the controlflow backend implementation pointer - assert(_controlflow); + _builtin = backend_object.get(); // Save the builtin backend implementation pointer + assert(_builtin); _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); } @@ -69,68 +64,67 @@ void BackendManager::loadBackend(const std::string &backend) return; } - // TODO Remove indentation - // Workaround If backend have dynamic library with "-boost" suffix naming, - // BackendManager load library with "-boost" suffix instead of library without suffix - // This feature is used for custom backend extension to support additional operations - { - const std::string backend_boost_so = "libbackend_" + backend + "-boost" + SHARED_LIB_EXT; - const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; + const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; + void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); - void *handle = dlopen(backend_boost_so.c_str(), RTLD_LAZY | RTLD_LOCAL); - if (handle == nullptr) - { - handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) + { + VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n"; + return; + } - if (handle == nullptr) - { - VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl; - return; - } + VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n"; - VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n"; + { + // load object creator function + auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); + if (backend_create == nullptr) + { + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n", + dlerror()); + dlclose(handle); + return; } - else + + // load object creator function + auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); + if (backend_destroy == nullptr) { - VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_boost_so << "\n"; + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n", + dlerror()); + dlclose(handle); + return; } + auto backend_object = + std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy); + bool initialized = backend_object->config()->initialize(); // Call initialize here? + if (!initialized) { - // load object creator function - auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); - if (backend_create == nullptr) - { - fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n", - dlerror()); - abort(); - } + VERBOSE(BackendManager) << backend.c_str() + << " backend initialization failed. Don't use this backend" + << std::endl; + dlclose(handle); + return; + } + _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); + } - // load object creator function - auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); - if (backend_destroy == nullptr) + // Save backend handle (avoid warning by handle lost without dlclose()) + auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{ + handle, [id = backend, filename = backend_so](void *h) { + if (dlclose(h) == 0) { - fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n", - dlerror()); - abort(); + VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n"; } - - auto backend_object = - std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy); - bool initialized = backend_object->config()->initialize(); // Call initialize here? - if (!initialized) + else { - VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend" - << std::endl; - dlclose(handle); - return; + VERBOSE(BackendManager) << "Failed to unload backend '" << id << "'- " << dlerror() << "\n"; } - _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); - } - - // Save backend handle (avoid warning by handle lost without dlclose()) - auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }}; - _handle_map.emplace(backend, std::move(u_handle)); - } + }}; + _handle_map.emplace(backend, std::move(u_handle)); } backend::Backend *BackendManager::get(const std::string &key) @@ -153,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const return nullptr; } -const backend::controlflow::Backend *BackendManager::getControlflow() const { return _controlflow; } +const backend::Backend *BackendManager::getBuiltin() const { return _builtin; } } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 93dbbc3b5..ba621bb4f 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -16,284 +16,185 @@ #include "compiler/Compiler.h" -#include "ParamChecker.h" +#include "CompilerHelpers.h" #include "ExecutorFactory.h" -#include "OperationValidator.h" -#include "Fp32ToFp16Converter.h" - -#include <backend/controlflow/Config.h> -#include "compiler/BackendManager.h" -#include "compiler/IScheduler.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" -#include "compiler/StaticShapeInference.h" -#include "exec/ExecTime.h" -#include "ir/operation/LowerInfo.h" -#include "dumper/dot/DotDumper.h" -#include "compiler/Linear.h" -#include "interp/InterpExecutor.h" -#include "util/ConfigSource.h" -#include "util/logging.h" -#include "ir/OperationDumper.h" -#include "misc/string_helpers.h" +#include "ShapeValidator.h" +#include "pass/ConstantOutputPass.h" +#include "pass/OddOutputPass.h" +#include "pass/PassRunner.h" +#include "pass/UnusedOperandEliminationPass.h" +#include "../dumper/dot/DotDumper.h" +#include "../exec/SingleModelExecutors.h" +#include "../ir/OperationDumper.h" +#include "../ir/verifier/Verifier.h" + +#include "compiler/StaticShapeInferer.h" + +#include <misc/string_helpers.h> +#include <misc/polymorphic_downcast.h> namespace onert { - namespace compiler { -CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) +Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt) + : _model{model}, _options{&copt} { - CompilerOptions options; - options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); - options.is_primary_subgraph = false; - options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); - options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); - options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE); - options.executor = util::getConfigString(util::config::EXECUTOR); - options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); - options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); - options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); - options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); -#ifdef RUY_PROFILER - options.op_seq_max_node = 1; -#endif - - { - // Backend for all - auto &ms_options = options.manual_scheduler_options; - - // Default value for op_backend_all is first element in the backend list - ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS); - -// Opcode to Backend -#define OP(OpName) \ - { \ - const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \ - if (!backend_str.empty()) \ - { \ - ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \ - } \ - } -#include "ir/Operations.lst" -#undef OP - - // Index to Backend - // TODO Support multiple subgraphs for manual scheduling - auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); - auto key_val_list = nnfw::misc::split(map_str, ';'); - for (const auto &key_val_str : key_val_list) - { - if (key_val_str.empty()) - { - continue; - } - - auto key_val = nnfw::misc::split(key_val_str, '='); - const auto &key_str = key_val.at(0); - const auto &val = key_val.at(1); - auto key = static_cast<uint32_t>(std::stoi(key_str)); - - subgs.at(ir::SubgraphIndex{0}) - ->operations() - .at(ir::OperationIndex{key}); // Check if exist, or this wil throw - ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); - } - } - return options; + // DO NOTHING } -Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs) - : _subgraphs{subgs}, _state{State::CREATED} +Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts) + : _model{nnpkg->primary_model()}, _options{copts[0].get()} { - // Set default values for CompilerOptions - // All these default values should not be fetched from Env, when we stop supporting Android NN - // API. - _options = fetchCompilerOptionsFromGlobalConfig(*subgs); + // Use for single model only + assert(nnpkg->model_count() == 1); } -void Compiler::enableToFp16() { _options.fp16_enable = true; } - -void Compiler::checkProfilerConditions() +std::shared_ptr<CompilerArtifact> Compiler::compile(void) { - if (!_options.he_scheduler) - throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); - - if (_options.executor != "Dataflow") - throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); -} + /*************************************************** + * Prepare compilation phase + ***************************************************/ + if (!_options) + throw std::runtime_error{"Empty compile option"}; -std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) -{ - // Set control flow backend for control flow operators + // Mode check + // TODO handle option for each model + if (_options->he_profiling_mode) { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = - backend::controlflow::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = - backend::controlflow::Config::ID; + if (!_options->he_scheduler) + throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); + + if (_options->executor != "Dataflow") + throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); } - // FIXME This is a workaround for bcq operations, should remove it + if (!_options->minmax_filepath.empty()) { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + if (_options->executor != "Linear") + throw std::runtime_error("Recording minmax works only with Linear executor"); } + if (!_model->hasOnly<ir::Graph>()) { - VERBOSE(Compiler) << std::boolalpha; - VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl; - VERBOSE(Compiler) << "backend_list : " - << nnfw::misc::join(_options.backend_list.begin(), - _options.backend_list.end(), "/") - << std::endl; - VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl; - VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl; - VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl; - VERBOSE(Compiler) << "executor : " << _options.executor << std::endl; - VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl; - VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl; - VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl; - VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl; - VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl; - VERBOSE(Compiler) << std::noboolalpha; + throw std::runtime_error("Compiler can only compile models for inference."); } - /*************************************************** - * Prepare compilation phase - ***************************************************/ + _options->forceInternalOptions(); + _options->verboseOptions(); - auto executors = std::make_shared<exec::ExecutorMap>(); + auto custom_kernel_builder = _model->getKernelBuilder(); - // Compilable check - // TODO: Support hybrid execution - - // execution between interpreter and compiled executor (including control flow) - if (!checkCompilable()) - { - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg)); - }); - _state = State::COMPILED; - return executors; - } + _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); - // Mode check - if (_options.he_profiling_mode) - checkProfilerConditions(); + // Mandatory passes + pass::PassRunner{} + .append(std::make_unique<pass::ConstantOutputPass>(subg)) + .append(std::make_unique<pass::OddOutputPass>(subg)) + .run(); + + // Optimizations + pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run(); + }); /*************************************************** * Backend independent analysis & optimization phase ***************************************************/ - auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level); + // TODO Handle dump level for each model + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + auto tracing_ctx = std::make_unique<util::TracingCtx>(); // Lower: Assign backend std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs; - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - _options.is_primary_subgraph = (index == ir::SubgraphIndex{0}); - onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); - dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); - - // Lower: Assign backend - lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options); - - // Check backend(s) for subgraph support FP16 - bool backends_support_fp16 = true; - auto &contexts = (*lowered_subgs[index]).backend_contexts(); - for (auto it = contexts.begin(); it != contexts.end(); it++) - { - // Controlflow backend is not for actual computaion of operations so it is an exception - if (it->first->config()->id() != backend::controlflow::Config::ID) - backends_support_fp16 &= it->first->config()->supportFP16(); - } - - if (_options.fp16_enable && backends_support_fp16) - { - // NOTE: the only acl_cl backend enables fp16 mode - Fp32ToFp16Converter(*lowered_subgs[index]).run(); - } + { + _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + + // Lower: Assign backend + lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options); + // Set tracing_ctx for copied graph + if (tracing_ctx != nullptr) + tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value()); + }); + } - subg.setSubgraphs(nullptr); - }); + _model.reset(); - _subgraphs.reset(); + for (const auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + const auto &lowered_subg = pair.second; + dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value())); + } // Shape inference. { + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(lowered_subgs); + const auto primary_subg_idx = ir::SubgraphIndex{0}; - StaticShapeInferer inferer(primary_subg_idx, lowered_subgs); - lowered_subgs.at(primary_subg_idx) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - auto has_dynamic_tensor = inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - inferer.dump(); - } + inferers.at(primary_subg_idx)->infer(); - /************************************************************* - * Backend independent analysis & optimization phase finished - *************************************************************/ + for (const auto &pair_inferer : inferers) + { + const auto inferer = pair_inferer.second.get(); + inferer->dump(); + } + } - // operation validation - for (auto &pair : lowered_subgs) + // Shape validation + // TODO Move shape independent feature check from ShapeValidator to OperationValidator + // TODO Move ShapeValidator into shape inference + // - Check input tensor shape validation + // - Check parameter value validation which valid value is depend on input tensor shape + // - Output tensor shape validation check is needless because + // static/dynamic shape inferer will make valid output shape + for (const auto &pair : lowered_subgs) { auto &lowered_subg = pair.second; - compiler::OperationValidator{lowered_subg->graph()}(); + compiler::ShapeValidator{lowered_subg->graph()}(); } - executors = std::make_shared<exec::ExecutorMap>(); - for (auto &pair : lowered_subgs) + /************************************************************* + * Backend independent analysis & optimization phase finished + *************************************************************/ + auto executors = std::make_shared<exec::SingleModelExecutors>(); + for (auto &&pair : lowered_subgs) { - const auto &subg_index = pair.first; + auto const model_index = ir::ModelIndex{0}; + auto const subg_index = pair.first; auto &lowered_subg = pair.second; - auto indexed_ranks = lowered_subg->indexed_ranks(); - - _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0}); - - onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); - dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); + auto const indexed_ranks = lowered_subg->indexed_ranks(); - ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value())); + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _options; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builder; auto executor = std::unique_ptr<exec::IExecutor>{ - ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)}; + ExecutorFactory::get().create(std::move(lowered_subg), executors, args)}; executor->setIndexedRanks(indexed_ranks); - executors->insert(std::make_pair(subg_index, std::move(executor))); + executors->emplace(model_index, subg_index, std::move(executor)); } /******************************** * Code generation phase finished ********************************/ - _state = State::COMPILED; - return executors; -} - -bool Compiler::checkCompilable() -{ - // Disable compile phase - // When ready to use interpreter backend, remove this config and use backend setting - if (_options.disable_compile) - { - return false; - } - - // TODO check unspecified operand shape - - // Check compilable parameter - for (uint32_t i = 0; i < _subgraphs->count(); ++i) - { - auto graph = _subgraphs->at(ir::SubgraphIndex{i}); - ParamChecker paramChecker{graph}; - paramChecker(); - if (paramChecker.haveNoneConstParam()) - { - return false; - } - } - - return true; + return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx)); } } // namespace compiler - } // namespace onert diff --git a/runtime/onert/core/src/compiler/CompilerFactory.cc b/runtime/onert/core/src/compiler/CompilerFactory.cc new file mode 100644 index 000000000..aeb0876c4 --- /dev/null +++ b/runtime/onert/core/src/compiler/CompilerFactory.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/CompilerFactory.h" + +#include "MultiModelCompiler.h" +#ifdef ONERT_TRAIN +#include "train/TrainingCompiler.h" +#endif // ONERT_TRAIN + +#include "compiler/Compiler.h" + +namespace onert +{ +namespace compiler +{ + +CompilerFactory &CompilerFactory::get() +{ + static CompilerFactory singleton; + return singleton; +} + +std::unique_ptr<ICompiler> +CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const compiler::train::TrainingInfo *training_info) +{ +#ifdef ONERT_TRAIN + // Returing compiler for training + if (training_info) + return std::make_unique<train::TrainingCompiler>(nnpkg, copts, *training_info); +#else // ONERT_TRAIN + (void)training_info; +#endif // ONERT_TRAIN + + // Returing compiler for inference + if (nnpkg->model_count() == 1) + return std::make_unique<Compiler>(nnpkg, copts); + + return std::make_unique<MultiModelCompiler>(nnpkg, copts); +} + +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/CompilerHelpers.h b/runtime/onert/core/src/compiler/CompilerHelpers.h new file mode 100644 index 000000000..798334b3b --- /dev/null +++ b/runtime/onert/core/src/compiler/CompilerHelpers.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_COMPILER_HELPERS_H__ +#define __ONERT_COMPILER_COMPILER_HELPERS_H__ + +#include <compiler/ILoweredGraph.h> +#include <compiler/StaticShapeInferer.h> +#include <ir/Index.h> + +#include <memory> +#include <unordered_map> + +namespace onert +{ +namespace compiler +{ + +/** + * @brief Create a shape inferer map for a lowered model + * @param[in] lowered_subgs lowered model map + * @return Shape inferer map + */ +template <typename LoweredGraphType, + typename = std::enable_if_t<std::is_base_of<ILoweredGraph, LoweredGraphType>::value>> +static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> +createStaticShapeInferers( + const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraphType>> &lowered_subgs) +{ + std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> lsubgs; + for (auto &&e : lowered_subgs) + lsubgs[e.first] = e.second.get(); + return StaticShapeInferer::createStaticShapeInferers(lsubgs); +} + +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_COMPILER_HELPERS_H__ diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc new file mode 100644 index 000000000..830d9dd00 --- /dev/null +++ b/runtime/onert/core/src/compiler/CompilerOptions.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/CompilerOptions.h" + +#include "../backend/builtin/Backend.h" + +#include "util/ConfigSource.h" +#include "util/logging.h" + +#include <misc/string_helpers.h> + +namespace +{ + +using namespace onert; + +std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend) +{ + std::unordered_map<ir::OpCode, std::string>::iterator it; + std::string opbackends; + + for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it) + { + if (!opbackends.empty()) + opbackends = opbackends + ", "; + + auto opcode = it->first; + const std::string opname = ir::toString(opcode); + opbackends += opname + "=" + it->second; + } + return opbackends; +} + +} // namespace + +namespace onert +{ +namespace compiler +{ + +void ManualSchedulerOptions::setBackendMap(const std::string &str) +{ + // TODO Support multiple subgraphs for manual scheduling + auto key_val_list = nnfw::misc::split(str, ';'); + for (const auto &key_val_str : key_val_list) + { + if (key_val_str.empty()) + { + continue; + } + + auto key_val = nnfw::misc::split(key_val_str, '='); + const auto &key_str = key_val.at(0); + const auto &val = key_val.at(1); + auto key = static_cast<uint32_t>(std::stoi(key_str)); + this->index_to_backend.emplace(ir::OperationIndex{key}, val); + } +} + +std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig() +{ + auto o = std::make_unique<CompilerOptions>(); + o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); + o->minmax_filepath = util::getConfigString(util::config::MINMAX_FILEPATH); + o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); + o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); + o->executor = util::getConfigString(util::config::EXECUTOR); + o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); + o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); + o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); + { + // Backend for all + auto &ms_options = o->manual_scheduler_options; + + // Default value for op_backend_all is first element in the backend list + ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS); + +// Opcode to Backend +#define OP(OpName) \ + { \ + const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \ + if (!backend_str.empty()) \ + { \ + ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \ + } \ + } +#include "ir/Operations.lst" +#undef OP + + // Index to Backend + auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); + ms_options.setBackendMap(map_str); + } + return o; +} + +void CompilerOptions::forceInternalOptions() +{ + // Set control flow backend for control flow operators + auto &builtin_id = backend::builtin::Config::ID; + manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; + manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; + manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; + + // FIXME This is a workaround for bcq operations, should remove it + manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + + // FIXME This is a workaround for bulk operations, should remove it + manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix"; +} + +void CompilerOptions::verboseOptions() +{ + VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl; + VERBOSE(Compiler) << "backend_list : " + << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl; + VERBOSE(Compiler) << "trace_filepath : " << trace_filepath << std::endl; + VERBOSE(Compiler) << "graph_dump_level : " << graph_dump_level << std::endl; + VERBOSE(Compiler) << "executor : " << executor << std::endl; + VERBOSE(Compiler) << "manual backend_for_all : " << manual_scheduler_options.backend_for_all + << std::endl; + VERBOSE(Compiler) << "manual_scheduler_options : " + << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl; + VERBOSE(Compiler) << "he_scheduler : " << he_scheduler << std::endl; + VERBOSE(Compiler) << "he_profiling_mode : " << he_profiling_mode << std::endl; + VERBOSE(Compiler) << "fp16_enable : " << fp16_enable << std::endl + << std::noboolalpha; +} + +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index 062c6c9c3..6a08524cc 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,26 +16,37 @@ #include "ExecutorFactory.h" +#include "Linear.h" +#include "../backend/builtin/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/UserTensor.h" +#include "../dumper/text/GraphDumper.h" +#include "../exec/DataflowExecutor.h" +#include "../exec/ExecTime.h" +#include "../exec/ExecutionObservers.h" +#include "../exec/LinearExecutor.h" +#ifdef MINMAX_H5DUMPER +#include "../exec/MinMaxRecorder.h" +#endif +#include "../exec/ParallelExecutor.h" +#include "../ir/OperationCloner.h" + +#include <backend/IPortableTensor.h> +#include <compiler/BackendManager.h> +#include <compiler/ExecutionBuilder.h> +#include <util/TracingCtx.h> + #include <functional> -#include "exec/ExecutionObservers.h" -#include "exec/LinearExecutor.h" -#include "exec/DataflowExecutor.h" -#include "exec/ParallelExecutor.h" -#include "compiler/BackendManager.h" -#include "compiler/ExecutionBuilder.h" -#include "exec/ExecTime.h" -#include "compiler/Linear.h" -#include "compiler/TensorBuilders.h" -#include "backend/IConstantInitializer.h" -#include "backend/IKernelGenerator.h" -#include "backend/IOptimizer.h" -#include "backend/ITensorRegister.h" -#include "backend/controlflow/Config.h" -#include "backend/controlflow/KernelGenerator.h" -#include "backend/controlflow/UserTensor.h" -#include "backend/controlflow/TensorBuilder.h" #include <memory> +#ifdef ONERT_TRAIN +#include "../backend/builtin/train/BackendContext.h" +#include "../exec/train/TrainableExecutor.h" + +#include <backend/train/TrainableBackendContext.h> +#include <backend/train/ITrainableBackend.h> +#endif // ONERT_TRAIN + namespace onert { namespace @@ -46,7 +57,7 @@ class SyncFunction final : public exec::IFunction public: virtual ~SyncFunction() = default; SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config) - : _fn{std::move(fn)}, _config{config} + : _fn{std::move(fn)}, _config{config} { assert(_fn); assert(_config); @@ -65,21 +76,218 @@ private: std::shared_ptr<backend::IConfig> _config; }; -// TODO Think of a better way to manage TensorManagers -backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders) +using DeallocList = std::vector<backend::ITensor *>; +// Deallocation after execution of an operation used by Linear Executor +class DeallocFunction final : public exec::IFunction +{ +public: + DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {} + + void run() override + { + for (auto &&tensor : _dealloc_list) + { + if (!tensor->is_dynamic()) + continue; + tensor->deallocBuffer(); + } + } + +private: + DeallocList _dealloc_list; +}; + +// TODO Unify initializeSubgraphIOTensors +void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph, + const backend::BackendContexts &backend_contexts, + const ir::OperandIndexSequence &indices) +{ + // TODO Store builtin backend in BackendContext + std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg; + for (const auto &e : backend_contexts) + { + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::builtin::Config::ID) + { + builtin_tensor_reg = + std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry); + } + } + assert(builtin_tensor_reg); + + for (auto &&ind : indices) + { + const auto &operand = lowered_graph.graph().operands().at(ind); + auto tensor = std::make_unique<backend::builtin::IOTensor>( + operand.info(), + ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */ + ); + + // Add tensor to builtin TensorRegistry. + builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); + } +} + +#ifdef ONERT_TRAIN +void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts, + const ir::OperandIndexSequence &indices) { - backend::TensorManagerSet tensor_mgrs; - for (auto &tensor_builder : tensor_builders) + std::shared_ptr<backend::builtin::train::TensorRegistry> builtin_tensor_reg; + for (const auto &e : backend_contexts) { - auto s_tensor_manager = tensor_builder->releaseStaticTensorManager(); - if (s_tensor_manager != nullptr) - tensor_mgrs.insert(std::move(s_tensor_manager)); + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::builtin::Config::ID) + { + builtin_tensor_reg = std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>( + context->tensor_registry()); + } + } + assert(builtin_tensor_reg); + + for (auto &&ind : indices) + { + const auto &operand = lowered_graph.graph().operands().at(ind); + auto tensor = std::make_unique<backend::builtin::IOTensor>( + operand.info(), + ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */ + ); + + // Add tensor to builtin TensorRegistry. + builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); + } +} +#endif // ONERT_TRAIN + +backend::BackendContexts +createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor, + std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder) +{ + backend::BackendContexts contexts; + auto &backend_manager = compiler::BackendManager::get(); + + std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map; + + // Generate partial graphs for each backend + for (auto &&backend : backend_manager.getAll()) + { + auto &data = context_data_map[backend]; + auto graph = std::make_unique<ir::Graph>(); + graph->setLayout(lgraph.graph().layout()); + data.graph = std::move(graph); + } + + auto &whole_graph = lgraph.graph(); + // Separate operands into partial graphs + whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) { + auto &operand_li = lgraph.lower_info().operand; + const auto &def_factors = operand_li.at(operand_ind).def_factors(); + if (def_factors.size() == 0) // Ignore unused tensor + return; + const auto &def_factor = def_factors.getOnlyElement(); + const auto backend = def_factor.backend(); + auto &partial_graph = *context_data_map[backend].graph; + auto &operand_layouts = context_data_map[backend].operand_layouts; + assert(operand_layouts.find(operand_ind) == operand_layouts.end()); + operand_layouts[operand_ind] = def_factor.layout(); + + // Copy the operand and insert it to the partial graph + auto new_operand = std::make_unique<ir::Operand>(operand); + new_operand->clearDefUse(); + operand.releaseData(); // Deref data of LoweredGraph + auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand)); + UNUSED_RELEASE(new_operand_ind); + assert(new_operand_ind == operand_ind); + }); + // Separate operations into partial graphs + whole_graph.operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &operation) { + auto &op_li = lgraph.lower_info().operation; + auto backend = op_li.at(op_ind).backend(); + auto &partial_graph = *context_data_map[backend].graph; + auto &external_operands = context_data_map[backend].external_operands; + auto &operand_layouts = context_data_map[backend].operand_layouts; + + { + // Add missing operands (externals) + auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED | + ir::Remove::UNDEFINED; + for (auto &&operand_ind : io_list) + { + if (partial_graph.operands().exist(operand_ind)) + continue; + + // Copy the operand and insert it to the partial graph + const auto &operand = whole_graph.operands().at(operand_ind); + auto new_operand = std::make_unique<ir::Operand>(operand); + new_operand->clearDefUse(); + auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand)); + UNUSED_RELEASE(new_operand_ind); + assert(new_operand_ind == operand_ind); + + auto layout = + lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout(); + assert(operand_layouts.find(operand_ind) == operand_layouts.end()); + operand_layouts[operand_ind] = layout; + external_operands.add(operand_ind); + } + + auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation)); + UNUSED_RELEASE(new_op_ind); + assert(new_op_ind == op_ind); + } + }); + + // Create contexts + auto whole_op_order = lgraph.graph().topolSortOperations(); + for (auto &&pair : context_data_map) + { + auto backend = pair.first; + auto &data = pair.second; + // Handle graph input/outputs or external tensors + data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { + if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind)) + data.external_operands.add(ind); + // Inputs are either "graph input" or "no def op and non-constant" + if (whole_graph.getInputs().contains(ind) || + (!operand.getDef().valid() && !operand.isConstant())) + // Outputs are either "graph output" or "no uses" + data.graph->addInput(ind); + if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0) + data.graph->addOutput(ind); + }); + dumper::text::dumpGraph(*data.graph); + + std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order), + [&](const auto &ind) { return data.graph->operations().exist(ind); }); + data.is_linear_executor = linear_executor; + data.custom_kernel_builder = custom_kernel_builder; + contexts.emplace(backend, backend->newContext(std::move(data))); + } + return contexts; +} + +template <typename Context> +std::deque<std::pair<const backend::Backend *, Context *>> orderBackendContext( + const std::unordered_map<const backend::Backend *, std::unique_ptr<Context>> &tbackend_contexts) +{ + std::deque<std::pair<const backend::Backend *, Context *>> ordered_contexts; - auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager(); - if (d_tensor_manager != nullptr) - tensor_mgrs.insert(std::move(d_tensor_manager)); + for (auto &&pair : tbackend_contexts) + { + // NOTE builtin backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "builtin") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } - return tensor_mgrs; + + return ordered_contexts; } } // namespace @@ -106,415 +314,588 @@ ExecutorFactory::ExecutorFactory() } exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map) + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args) { - return _map.at(options.executor)(std::move(lowered_graph), options, executor_map); + assert(args.options != nullptr); + return _map.at(args.options->executor)(std::move(lowered_graph), executors, args); } -void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph) +void ExecutorFactory::prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, + const backend::BackendContexts &backend_contexts) { - struct Entry - { - std::vector<backend::BackendContext::OperationInfo> operation_list; - std::vector<ir::OperandIndex> operand_list; - }; - std::unordered_map<const backend::Backend *, Entry> backend_assets; - - // Build lists for operations - lowered_graph->op_seqs().iterate( - [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) { - auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq; - auto backend = op_seq_li.at(op_seq_index)->backend(); - for (auto &operation_idx : op_seq.operations()) + TensorRegistries tensor_regs{backend_contexts, true}; + + lowered_graph.graph().operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind); + auto &backend_ctx = backend_contexts.at(lower_info->backend()); + for (auto &&ind : + (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + { + // If an Operation's input/output tensor does not have an own tensor object, + // it must be using migrant tensors, so find the tensor from other tensor registries and + // register it to the current tensor registry if it is portable + if (!backend_ctx->tensor_registry->getITensor(ind)) { - backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout()); + auto tensor = tensor_regs.getITensor(ind); + assert(tensor); // The tensor must have been registered + auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor); + if (ptensor) + backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor); } - }); + } + }); +} - // Build lists for operands - lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - const auto lower_info = lowered_graph->getLowerInfo(ind); - for (auto factor : lower_info->def_factors()) +void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, + const std::shared_ptr<exec::IExecutors> &executors, + const backend::BackendContexts &backend_contexts, + const ir::ModelIndex &index) +{ + for (auto &&pair : backend_contexts) + { + auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get()); + if (builtin_context != nullptr) { - auto backend = factor.backend(); - backend_assets[backend].operand_list.emplace_back(ind); + auto builtin_kernel_gen = builtin_context->kernel_gen; + builtin_kernel_gen->setTensorRegistries(tensor_regs); + builtin_kernel_gen->setExecutors(executors); + builtin_kernel_gen->setModelIndex(index); } - }); + } +} - for (auto &pair : backend_assets) +std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> +ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts) +{ + std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; + for (auto &&pair : backend_contexts) { - auto backend = pair.first; - auto &arg = pair.second; - lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list); + // NOTE builtin backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "builtin") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } + return ordered_contexts; } -void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph, - const std::vector<ir::OpSequenceIndex> &order) +exec::IExecutor * +ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args) { - for (const auto index : order) + const auto options = args.options; + const auto &model_index = args.model_index; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; + auto &graph = lowered_graph->graph(); + + backend::BackendContexts backend_contexts = + createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder); + + TensorRegistries tensor_regs{backend_contexts, true}; + + initializeSubgraphIOTensors( + *lowered_graph, backend_contexts, + (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); + + // linearize + auto order = Linear::linearize(*lowered_graph); + Linear::dump(*lowered_graph, order); + + for (auto &&pair : backend_contexts) { - const auto &op_seq = lowered_graph->op_seqs().at(index); - const auto backend = lowered_graph->getLowerInfo(index)->backend(); - const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register; - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs(); + pair.second->genTensors(); + } + + prepareMigrantTensors(*lowered_graph, backend_contexts); - if (tensor_register) + // Give some runtime objects to builtin KernelGenerator + prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index); + + ExecutionBuilder builder; + + // Adjust the order of backends for the upcoming iteration + auto ordered_contexts = orderBackendContext(backend_contexts); + + // Simulate the execution for deallocation of tensors + std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map; + { + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexSequence constants; + + auto model_io = + (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + // Prepare scanning + graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + uses_map[ind] = obj.getUses().size(); + + if (obj.isConstant()) + constants.append(ind); + }); + + // A trick to consider constants as an execption + for (const auto &ind : constants) { - // Custom registration - tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo()); + uses_map[ind]++; } - else + + for (const auto &op_ind : order) { - // Default registration - for (const auto op_idx : op_seq) + const auto &op = graph.operations().at(op_ind); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + for (const auto &ind : op_inputs) { - const auto &op = lowered_graph->graph().operations().at(op_idx); - for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs()) + const auto &operand = graph.operands().at(ind); + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind)) { - if (!tensor_builder->isRegistered(index) && !model_io.contains(index)) - { - const auto &operand_lower_info = - lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement(); - - // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) - // op.getOutputs() of permute (CPU) returns tensor A - // but tensor A belongs to the backend of acl_cl. - // So, we have to make this tensor NOT registered for CPU. - if (operand_lower_info.backend() != backend) - continue; - - const auto &obj = lowered_graph->graph().operands().at(index); - const auto frontend_layout = op_seq.getLayout(); - const auto backend_layout = operand_lower_info.layout(); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), - obj.isConstant()}; - tensor_builder->registerTensorInfo(index, backend_info, backend_layout); - } + dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind)); } } } - } -} -std::vector<std::shared_ptr<backend::ITensor>> -ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices) -{ - std::vector<std::shared_ptr<backend::ITensor>> ret; + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + } - // TODO Store controlflow backend in BackendContext - std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder; - std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg; - for (const auto &e : lowered_graph.backend_contexts()) + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + } + + // Generate kernels + for (auto &&pair : ordered_contexts) { - auto backend = e.first; - auto &context = e.second; - if (backend->config()->id() == backend::controlflow::Config::ID) + auto codes = pair.second->genKernels(); + for (auto &&pair : codes) { - cf_tensor_builder = - std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder); - cf_tensor_reg = - std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry); + auto &op_ind = pair.first; + auto &fn_seq = pair.second; + auto &op = lowered_graph->graph().operations().at(op_ind); + auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); + if (options->he_profiling_mode) + fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + if (!dealloc_list_map[op_ind].empty()) + fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind])); + builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)}); } } - assert(cf_tensor_builder); - assert(cf_tensor_reg); - for (auto ind : indices) + auto code_map = builder.releaseCodeMap(); + + auto exec = new exec::LinearExecutor{std::move(lowered_graph), + std::move(backend_contexts), + tensor_regs, + std::move(code_map), + order, + tracing_ctx}; + + if (!options->trace_filepath.empty()) { - const auto &operand = lowered_graph.graph().operands().at(ind); - auto tensor = std::make_shared<backend::controlflow::UserTensor>( - operand.info(), - ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */ - cf_tensor_builder->dynamicTensorManager()); - - // Add tensor to controlflow TensorRegistry. - cf_tensor_reg->setNativeUserTensor(ind, tensor); - ret.push_back(tensor); + std::unique_ptr<exec::IExecutionObserver> ctp = + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); + exec->addObserver(std::move(ctp)); } - return ret; -} +#ifdef MINMAX_H5DUMPER + if (!options->minmax_filepath.empty()) + exec->addObserver(std::make_unique<exec::MinMaxRecorder>( + options->minmax_filepath, exec->graph(), exec->getBackendContexts())); +#endif -void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph) -{ - TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true}; - - lowered_graph.op_seqs().iterate( - [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph.getLowerInfo(op_seq_index); - auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend()); - for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - // If an OpSequence input/output tensor does not have a own tensor object, - // it must be using external tensors, so find the tensor from other tensor builders and - // set the tensor to this tensor builder if portable - if (!backend_ctx->tensor_registry->getITensor(ind)) - { - auto tensor = tensor_regs.getITensor(ind); - assert(tensor); // The tensor must have been registered - auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor); - if (ptensor) - backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor); - } - } - }); + return exec; } exec::IExecutor * -ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map) +ExecutorFactory::createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, bool parallel) { - const auto &backend_contexts = lowered_graph->backend_contexts(); + const auto options = args.options; + const auto &model_index = args.model_index; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; - initializeBackendContext(lowered_graph.get()); + backend::BackendContexts backend_contexts = + createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder); - // linearize - assert(!lowered_graph->graph().isBuildingPhase()); + TensorRegistries tensor_regs{backend_contexts, true}; - /************************************************* - * Backend dependent analysis & optimization phase - *************************************************/ + initializeSubgraphIOTensors( + *lowered_graph, backend_contexts, + (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); - for (auto &pair : backend_contexts) + for (auto &&pair : backend_contexts) { - auto &optimizer = pair.second->optimizer; - if (optimizer) - optimizer->optimize(); + pair.second->genTensors(); } - /********************************************************** - * Backend dependent analysis & optimization phase finished - **********************************************************/ + prepareMigrantTensors(*lowered_graph, backend_contexts); - /*********************** - * Code generation phase - ***********************/ + // Give some runtime objects to builtin KernelGenerator + prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index); - auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); + ExecutionBuilder builder; - std::vector<std::shared_ptr<backend::ITensor>> input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> output_tensors; - if (options.is_primary_subgraph) + // Adjust the order of backends for the upcoming iteration + auto ordered_contexts = orderBackendContext(backend_contexts); + + // Generate kernels + for (auto &&pair : ordered_contexts) { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); + auto codes = pair.second->genKernels(); + for (auto &&pair : codes) + { + auto &op_ind = pair.first; + auto &fn_seq = pair.second; + auto &op = lowered_graph->graph().operations().at(op_ind); + auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); + if (options->he_profiling_mode) + fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)}); + } } - Linear::dump(*lowered_graph, order); - Linear::planTensors(*lowered_graph, order); + auto code_map = builder.releaseCodeMap(); - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; - TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; + exec::ExecutorBase *exec = nullptr; + if (parallel) + { + exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts), + tensor_regs, std::move(code_map), tracing_ctx}; + } + else + { + auto dataflow_exec = + new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, + std::move(code_map), tracing_ctx}; + if (options->he_profiling_mode) + { + std::vector<const backend::Backend *> backends; + for (const auto &pair : backend_contexts) + { + backends.push_back(pair.first); + } + auto et = std::make_shared<exec::ExecTime>(backends); + std::unique_ptr<exec::IExecutionObserver> obs = + std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph()); + dataflow_exec->addObserver(std::move(obs)); + } + exec = dataflow_exec; + } - for (auto &tensor_builder : tensor_builders) + if (!options->trace_filepath.empty()) { - tensor_builder->prepare(); + std::unique_ptr<exec::IExecutionObserver> ctp = + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); + exec->addObserver(std::move(ctp)); } - prepareExternalTensors(*lowered_graph); + return exec; +} - ExecutionBuilder builder; +#ifdef ONERT_TRAIN +exec::IExecutor * +ExecutorFactory::create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer) +{ + assert(args.options != nullptr); - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + if (args.options->executor != "Linear") + throw std::runtime_error("ExecutorFactory: TrainableExecutor supports only 'Linear' now"); + + return createTrainableExecutor(std::move(lowered_graph), executors, args, optimizer); +} + +void ExecutorFactory::prepareMigrantTensors( + compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts) +{ + train::TensorRegistries tensor_regs{backend_contexts, true}; + + lowered_graph.graph().operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind); + auto &backend_ctx = backend_contexts.at(lower_info->backend()); + for (auto &&ind : + (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + { + // If an Operation's input/output tensor does not have an own tensor object, + // it must be using migrant tensors, so find the tensor from other tensor registries and + // register it to the current tensor registry if it is portable + if (!backend_ctx->tensor_registry()->getITensor(ind)) + { + auto tensor = tensor_regs.getITensor(ind); + assert(tensor); // The tensor must have been registered + auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor); + if (ptensor) + backend_ctx->tensor_registry()->setMigrantTensor(ind, ptensor); + } + } + }); +} + +exec::IExecutor *ExecutorFactory::createTrainableExecutor( + std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &, const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer) +{ + const auto options = args.options; + const auto tracing_ctx = args.tracing_ctx; + auto custom_kernel_builder = args.custom_kernel_builder; + + auto &graph = lowered_graph->graph(); + + lowered_graph->trainable_graph().operations().iterate([](const onert::ir::OperationIndex &, + const onert::ir::IOperation &op) { + try { - cf_kernel_gen->setTensorRegistries(tensor_regs); - cf_kernel_gen->setExecutorMap(executor_map); + UNUSED_RELEASE(dynamic_cast<const ir::train::ITrainableOperation &>(op)); } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) + catch (std::bad_cast &) { - fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + throw std::runtime_error("ExecutorFactory: " + op.name() + " is not trainable operation yet"); } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); }); - for (auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); - } + // TODO Create context only once instead of replacing + backend::train::TrainableBackendContexts tbackend_contexts; + backend::BackendContexts base_backend_contexts = + createBackendContexts(*lowered_graph, true, custom_kernel_builder); - for (auto &pair : backend_contexts) + // Replace BackendContext with TrainbleBackendContext + for (auto &&pair : base_backend_contexts) { - pair.second->initConsts(); - } - - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) - { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); + auto ctx = pair.second.get(); + const auto &data = ctx->data(); + + // Create partial and trainable graphs + auto tgraph = std::make_unique<ir::train::TrainableGraph>(*data.graph); + data.graph->operations().iterate( + [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &) { + const auto &orig_tgraph = lowered_graph->trainable_graph(); + const auto &trainable_op = orig_tgraph.operation(op_index); + auto gen_index = tgraph->replaceOperation(op_index, trainable_op.clone()); + UNUSED_RELEASE(gen_index); + assert(gen_index == op_index); + }); + data.graph->operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + const auto &orig_tgraph = lowered_graph->trainable_graph(); + if (orig_tgraph.derivatives().exist(index)) + { + const auto &deriv = orig_tgraph.derivatives().at(index); + auto new_deriv = std::make_unique<ir::Operand>(deriv); + auto gen_index = tgraph->addDerivative(index, std::move(new_deriv)); + UNUSED_RELEASE(gen_index); + assert(gen_index == index); + } }); - } - backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders); - auto exec = new exec::LinearExecutor{ - std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(tensor_mgrs), std::move(code_map), order}; + // Remove outputs of whole graph from external_operands + auto external_operands = data.external_operands; + for (const auto &index : lowered_graph->trainable_graph().getOutputs()) + { + if (external_operands.contains(index)) + external_operands.remove(index); + } - if (!options.trace_filepath.empty()) - { - std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph()); - exec->addObserver(std::move(ctp)); + // Set trainable context data + backend::train::TrainableContextData tdata; + tdata.tgraph = std::move(tgraph); + tdata.op_order = std::move(data.op_order); + tdata.external_operands = std::move(external_operands); + tdata.operand_layouts = std::move(data.operand_layouts); + tdata.custom_kernel_builder = std::move(data.custom_kernel_builder); + tdata.is_linear_executor = data.is_linear_executor; + tdata.optimizer = optimizer; + + // TODO Remove dynamic_cast + try + { + const auto backend = pair.first; + const auto tbackend = dynamic_cast<const backend::train::ITrainableBackend *>(backend); + tbackend_contexts.emplace(backend, tbackend->newContext(std::move(tdata))); + } + catch (const std::bad_cast &) + { + throw std::runtime_error("ExecutorFactory: Invalid backend - TrainableExecutor does not " + "support non-trainble backends"); + } } + base_backend_contexts.clear(); - return exec; -} - -exec::IExecutor *ExecutorFactory::createDataflowExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel) -{ - const auto &backend_contexts = lowered_graph->backend_contexts(); + train::TensorRegistries tensor_regs{tbackend_contexts, true}; - initializeBackendContext(lowered_graph.get()); + initializeSubgraphIOTensors( + *lowered_graph, tbackend_contexts, + (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); + // linearize auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); + Linear::dump(*lowered_graph, order); - std::vector<std::shared_ptr<backend::ITensor>> input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> output_tensors; - if (options.is_primary_subgraph) + for (auto &&pair : tbackend_contexts) { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); + pair.second->genTensors(); } - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; - TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - - // To make tensors never be deallocated, this is a workaround to use static memory planner - for (auto &tensor_builder : tensor_builders) + for (auto &&pair : tbackend_contexts) { - lowered_graph->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &) { - if (tensor_builder->isRegistered(ind)) - { - tensor_builder->notifyFirstUse(ind); - } - }); + auto tctx = pair.second.get(); + tctx->genTrainingTensors(); } - for (auto &tensor_builder : tensor_builders) + prepareMigrantTensors(*lowered_graph, tbackend_contexts); + + // Give some runtime objects to builtin KernelGenerator + for (auto &&pair : tbackend_contexts) { - tensor_builder->prepare(); + auto builtin_context = + dynamic_cast<backend::builtin::train::BackendContext *>(pair.second.get()); + if (builtin_context != nullptr) + { + auto builtin_kernel_gen = builtin_context->kernel_gen; + builtin_kernel_gen->setTensorRegistries(tensor_regs); + builtin_kernel_gen->setWholeGraphOutputs(lowered_graph->trainable_graph().getOutputs()); + } } - prepareExternalTensors(*lowered_graph); + // Adjust the order of backends for the upcoming iteration + auto ordered_contexts = + onert::orderBackendContext<backend::train::TrainableBackendContext>(tbackend_contexts); - ExecutionBuilder builder; + // TODO Remove this simulation + // Simulate the execution for deallocation of tensors + std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map; + { + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexSequence constants; - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + auto model_io = + (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + // Prepare scanning + graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + uses_map[ind] = obj.getUses().size(); + + if (obj.isConstant()) + constants.append(ind); + }); + + // A trick to consider constants as an execption + for (const auto &ind : constants) { - assert(cf_kernel_gen != nullptr); - cf_kernel_gen->setTensorRegistries(tensor_regs); - cf_kernel_gen->setExecutorMap(executor_map); + uses_map[ind]++; } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) + + for (const auto op_ind : order) { - fn_seq->wrap<SyncFunction>(lower_info->backend()->config()); + const auto &op = graph.operations().at(op_ind); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + for (const auto &ind : op_inputs) + { + const auto &operand = graph.operands().at(ind); + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind)) + { + dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind)); + } + } } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); - }); - for (const auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); - } + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + } - for (auto &pair : backend_contexts) - { - pair.second->initConsts(); + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); } - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) + // Check derivative tensors { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); - }); + // TODO Support multiple subgraphs + // Check if the derivative tensors corresponding to inputs of model are nullptr + // NOTE The derivative tensors corresponding to inputs of model are for inputs of PermuteLayers + // and they are nullptr and because they are meaningless. + assert(std::all_of(lowered_graph->trainable_graph().getInputs().begin(), + lowered_graph->trainable_graph().getInputs().end(), + [&](const auto &input_idx) { + return tensor_regs.getDerivativeITensor(input_idx) == nullptr; + })); + + // Check if the derivative tensors corresponding to outputs of model exist + assert(std::all_of(lowered_graph->trainable_graph().getOutputs().begin(), + lowered_graph->trainable_graph().getOutputs().end(), + [&](const auto &output_idx) { + return tensor_regs.getDerivativeITensor(output_idx) == nullptr; + })); } - backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders); - - exec::ExecutorBase *exec = nullptr; - if (parallel) - { - exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, - output_tensors, tensor_regs, - std::move(tensor_mgrs), std::move(code_map)}; - } - else + train::TrainableCodeMap code_map; + // Generate kernels + for (auto &&pair : ordered_contexts) { - auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, - output_tensors, tensor_regs, - std::move(tensor_mgrs), std::move(code_map)}; - if (options.he_profiling_mode) + auto codes = pair.second->genKernels(); + for (auto &&pair : codes) { - std::vector<const backend::Backend *> backends; - for (const auto &pair : backend_contexts) - { - backends.push_back(pair.first); - } - auto et = std::make_shared<exec::ExecTime>(backends); - std::unique_ptr<exec::IExecutionObserver> obs = - std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph()); - dataflow_exec->addObserver(std::move(obs)); + auto &op_ind = pair.first; + auto &tn_seq = pair.second; + auto &op = lowered_graph->trainable_graph().operation(op_ind); + auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind); + + assert(code_map.find(op_ind) == code_map.end()); + code_map.insert( + {op_ind, train::TrainableCodeAndInfo{op_ind, &op, lower_info, std::move(tn_seq)}}); } - exec = dataflow_exec; } - if (!options.trace_filepath.empty()) + if (order.size() != code_map.size()) + { + throw std::runtime_error("ExecutorFactory: Some kernels are not generated"); + } + + auto exec = new exec::train::TrainableExecutor{std::move(lowered_graph), + std::move(tbackend_contexts), + tensor_regs, + std::move(code_map), + order, + tracing_ctx}; + + if (!options->trace_filepath.empty()) { std::unique_ptr<exec::IExecutionObserver> ctp = - std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph()); + std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } + // TODO Support MINMAX_H5DUMPER return exec; } +#endif // ONERT_TRAIN } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index b8893c03b..cc621bccf 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -17,18 +17,37 @@ #ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__ #define __ONERT_COMPILER_EXECUTOR_FACTORY_H__ -#include <unordered_map> +#include "TensorRegistries.h" #include "backend/ITensor.h" -#include "exec/IExecutor.h" + +#ifdef ONERT_TRAIN +#include "backend/train/TrainableBackendContext.h" +#endif // ONERT_TRAIN #include "compiler/LoweredGraph.h" -#include "TensorRegistries.h" +#ifdef ONERT_TRAIN +#include "compiler/train/LoweredTrainableGraph.h" +#include "exec/train/optimizer/Optimizer.h" +#endif // ONERT_TRAIN +#include "exec/IExecutors.h" + +#include <deque> +#include <unordered_map> namespace onert { namespace compiler { +// TODO Change to a better name +struct ExecutorFactoryArgs +{ + const util::TracingCtx *tracing_ctx; + const compiler::CompilerOptions *options; + ir::ModelIndex model_index; + std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder; +}; + class ExecutorFactory { public: @@ -36,35 +55,56 @@ public: public: exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map); + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args); + +#ifdef ONERT_TRAIN + // TODO Unify create() + exec::IExecutor *create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer); +#endif // ONERT_TRAIN private: ExecutorFactory(); private: - static void initializeBackendContext(compiler::LoweredGraph *lowered_graph); - static void runTensorRegistration(compiler::LoweredGraph *lowered_graph, - const std::vector<ir::OpSequenceIndex> &order); - static std::vector<std::shared_ptr<backend::ITensor>> - initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices); - static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph); + static void prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, + const backend::BackendContexts &backend_contexts); + static void prepareBuiltinBackend(const TensorRegistries &tensor_regs, + const std::shared_ptr<exec::IExecutors> &executors, + const backend::BackendContexts &backend_contexts, + const ir::ModelIndex &index); + static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> + orderBackendContext(const backend::BackendContexts &backend_contexts); + static exec::IExecutor * createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map); + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args); static exec::IExecutor * createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel); + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, bool parallel); +#ifdef ONERT_TRAIN + // TODO Unify prepareMigrantTensors + static void + prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph, + const backend::train::TrainableBackendContexts &backend_contexts); + static exec::IExecutor * + createTrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args, + const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer); +#endif // ONERT_TRAIN private: - std::unordered_map<std::string, std::function<exec::IExecutor *( - std::unique_ptr<compiler::LoweredGraph>, - const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map)>> - _map; + std::unordered_map< + std::string, std::function<exec::IExecutor *(std::unique_ptr<compiler::LoweredGraph>, + const std::shared_ptr<exec::IExecutors> &executors, + const ExecutorFactoryArgs &args)>> + _map; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc index 23a6a253d..ce9b09c2d 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#if 0 // This file is temporarily unused + #include "Fp32ToFp16Converter.h" #include "ir/operation/ConvertFp32ToFp16.h" #include "ir/operation/ConvertFp16ToFp32.h" @@ -45,7 +47,7 @@ namespace compiler { Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph) - : _lowered_graph{lowered_graph} + : _lowered_graph{lowered_graph} { VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl; } @@ -177,26 +179,26 @@ void Fp32ToFp16Converter::run() void Fp32ToFp16Converter::appendOpSequences() { _lowered_graph.op_seqs().iterate( - [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); - assert(lower_info != nullptr); - - // For now, the only acl_cl supports fully fp16 type - // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat - // operations. - // To do this, we could check the support by `operation by operation`. After that, we - // would partition an op_seq if it contains unsupported operations. - if (lower_info->backend()->config()->id() != kAclClBackendConfigId) - return; - - // OpSeq's input set should be included in the first operation's input set or - // OpSeq's output set should be included in the last operation's output set - assert(checkOperandsOfOpSequence(op_seq)); - - // Append converting OpSequence for fp16 but all operands' types are not fp16 still. - appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq); - appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq); - }); + [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + assert(lower_info != nullptr); + + // For now, the only acl_cl supports fully fp16 type + // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat + // operations. + // To do this, we could check the support by `operation by operation`. After that, we + // would partition an op_seq if it contains unsupported operations. + if (lower_info->backend()->config()->id() != kAclClBackendConfigId) + return; + + // OpSeq's input set should be included in the first operation's input set or + // OpSeq's output set should be included in the last operation's output set + assert(checkOperandsOfOpSequence(op_seq)); + + // Append converting OpSequence for fp16 but all operands' types are not fp16 still. + appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq); + appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq); + }); } // @@ -253,7 +255,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenc const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind); // set new lower_info for op_seq - setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind); + setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind); _list_fp32_to_fp16.insert(new_op_seq_ind); @@ -326,7 +328,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenc auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind); // set new lower_info for op_seq - setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind); + setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind); _list_fp16_to_fp32.insert(new_op_seq_ind); @@ -372,16 +374,16 @@ void Fp32ToFp16Converter::optimize() void Fp32ToFp16Converter::convertOperands() { _lowered_graph.op_seqs().iterate( - [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); - assert(lower_info != nullptr); - // For now, the only acl_cl supports fully fp16 - if (lower_info->backend()->config()->id() != kAclClBackendConfigId) - return; - - // Convert input,output operands' type to fp16 - convertOperandsOfOpSequence(op_seq); - }); + [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + assert(lower_info != nullptr); + // For now, the only acl_cl supports fully fp16 + if (lower_info->backend()->config()->id() != kAclClBackendConfigId) + return; + + // Convert input,output operands' type to fp16 + convertOperandsOfOpSequence(op_seq); + }); } void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq) @@ -391,10 +393,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq) const auto &op_seq_inputs = _lowered_graph.graph().getInputs(); const auto &op_seq_outputs = _lowered_graph.graph().getOutputs(); - for (auto &op_idx : op_seq) + for (const auto &op_idx : op_seq) { const auto &node = operations.at(op_idx); - for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED) + for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED) { if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind)) continue; @@ -405,10 +407,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq) obj.type(ir::DataType::FLOAT16); - VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl; } - for (auto &ind : node.getOutputs()) + for (const auto &ind : node.getOutputs()) { if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind)) continue; @@ -419,7 +421,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq) obj.type(ir::DataType::FLOAT16); - VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl; } } } @@ -444,7 +446,7 @@ void Fp32ToFp16Converter::convertDatas() obj.data(std::move(new_data)); obj.type(ir::DataType::FLOAT16); - VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl; } }); } @@ -513,23 +515,23 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OperandIndex &new_op_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); - auto new_lower_info = std::make_unique<ir::operand::LowerInfo>(); - auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout()); + auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>(); + auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout()); new_lower_info->addDefPermuteFactor(permute_factor); new_lower_info->addUsePermuteFactor(permute_factor); _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info)); } -void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind, - const ir::OpSequenceIndex &new_op_seq_ind) +void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind, + const ir::OpSequenceIndex &new_op_seq_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto new_lower_info = - std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout()); + std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout()); _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info)); } @@ -600,7 +602,7 @@ Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_se auto &new_op_obj = operands.at(new_op_ind); std::unique_ptr<ir::Operation> new_node( - new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind})); + new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind})); const auto new_node_ind = operations.push(std::move(new_node)); input_obj.insertUse(new_node_ind); @@ -620,7 +622,7 @@ Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_se auto &new_op_obj = operands.at(new_op_ind); std::unique_ptr<ir::Operation> new_node( - new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind})); + new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind})); const auto new_node_ind = operations.push(std::move(new_node)); new_op_obj.insertUse(new_node_ind); @@ -633,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex const ir::OperationIndex &node_index) { auto &node = _lowered_graph.graph().operations().at(node_index); - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto layout = lower_info->layout(); @@ -745,7 +747,7 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_ // | | // [OPERATION] [OPERATION] // - for (auto &op_seq_ind : found_input_in_op_seqs->second) + for (const auto &op_seq_ind : found_input_in_op_seqs->second) { auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind); if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end()) @@ -759,9 +761,8 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_ opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind); } - VERBOSE(Fp32ToFp16Converter) - << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)" - << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)" + << " to " << op_seq_ind << "(ToFp16)" << std::endl; } } } @@ -775,7 +776,7 @@ Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() c InputToOpSeqs input_to_op_seqs; op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) { - for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED) + for (auto &&input : op_seq.getInputs() | ir::Remove::UNDEFINED) { auto it = input_to_op_seqs.find(input); if (it == input_to_op_seqs.end()) @@ -798,13 +799,13 @@ Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_ OpSeqIndexList list; for (const auto &it : opseq_map_to_delete) { - auto &opseq_ind_fp16_to_fp32 = it.first; + const auto &opseq_ind_fp16_to_fp32 = it.first; if (list.find(opseq_ind_fp16_to_fp32) == list.end()) { list.emplace(opseq_ind_fp16_to_fp32); } - for (auto &opseq_ind_fp32_to_fp16 : it.second) + for (const auto &opseq_ind_fp32_to_fp16 : it.second) { if (list.find(opseq_ind_fp32_to_fp16) == list.end()) { @@ -842,7 +843,7 @@ Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete } void Fp32ToFp16Converter::manipulateContiguousOpSequences( - const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) + const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) { auto &op_seqs = _lowered_graph.op_seqs(); @@ -861,14 +862,14 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences( // | // [OPERATION] // op_seq_ind_next_to_fp16 // - for (auto it : opseq_map_to_delete) + for (auto &&it : opseq_map_to_delete) { // fp16_to_fp32's input/output num is always 1 auto &op_seq_ind_fp16_to_fp32 = it.first; auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32); auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0); - for (auto &op_seq_ind_fp32_to_fp16 : it.second) + for (const auto &op_seq_ind_fp32_to_fp16 : it.second) { auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16); assert(op_seq_fp32_to_fp16.size() == 1); @@ -878,7 +879,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences( auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16); assert(found_next_to_fp16 != input_to_op_seqs.end()); - for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second) + for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second) { manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32); } @@ -894,61 +895,62 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences( } void Fp32ToFp16Converter::deleteContiguousOpSequences( - const OpSeqIndexList &list_to_delete_op_seqs, - const ir::OperandIndexSequence &list_to_delete_ops) + const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops) { auto &operands = _lowered_graph.graph().operands(); auto &operations = _lowered_graph.graph().operations(); auto &op_seqs = _lowered_graph.op_seqs(); - for (auto &op_seq_ind : list_to_delete_op_seqs) + for (const auto &op_seq_ind : list_to_delete_op_seqs) { auto &op_seq = op_seqs.at(op_seq_ind); assert(op_seq.size() == 1); - VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl; auto &first_node_ind = op_seq.operations().at(0); auto &first_node = operations.at(first_node_ind); assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 || first_node.opcode() == ir::OpCode::ConvertFp16ToFp32); - VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl; // Uses - for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &obj = operands.at(ind); obj.removeUse(first_node_ind); - VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#" - << first_node_ind.value() << ") is removed" << std::endl; + VERBOSE(Fp32ToFp16Converter) + << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl; } // Def - for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &obj = operands.at(ind); assert(obj.getDef() == first_node_ind); obj.unsetDef(); - VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#" - << first_node_ind.value() << ") is removed" << std::endl; + VERBOSE(Fp32ToFp16Converter) + << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl; } // Operation operations.remove(first_node_ind); - VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl; // OpSequence op_seqs.remove(op_seq_ind); - VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl; } // Operand - for (auto &ind : list_to_delete_ops) + for (const auto &ind : list_to_delete_ops) { operands.remove(ind); - VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl; + VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl; } } } // namespace compiler } // namespace onert + +#endif diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h index eeecb9846..87751ceb4 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h @@ -14,6 +14,8 @@ * limitations under the License. */ +#if 0 // This file is temporarily unused + #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__ #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__ @@ -64,8 +66,8 @@ private: void setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OperandIndex &new_op_ind); - void setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind, - const ir::OpSequenceIndex &new_op_seq_ind); + void setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind, + const ir::OpSequenceIndex &new_op_seq_ind); void manipulateInput(const ir::OpSequenceIndex &op_seq_ind, const ir::OperandIndex &op_seq_input_ind, @@ -99,3 +101,5 @@ private: } // namespace onert #endif // __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__ + +#endif diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc index 5653b090e..56e2208d6 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.cc @@ -14,34 +14,32 @@ * limitations under the License. */ -#include "ir/Operand.h" -#include "compiler/HEScheduler.h" -#include "ir/Graph.h" -#include "util/ConfigSource.h" +#include "HEScheduler.h" + #include "compiler/BackendResolver.h" +#include "ir/Graph.h" #include "util/logging.h" -#include "util/Utils.h" -#include "exec/FunctionSequence.h" + #include <cassert> #include <cmath> -#include <chrono> -namespace onert +namespace { -namespace compiler -{ -static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node) +using namespace onert; + +uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::IOperation &node) { uint32_t size = 0; - for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + for (const auto &ind : + (node.getInputs() | ir::Remove::UNDEFINED) + (node.getOutputs() | ir::Remove::UNDEFINED)) { size += graph.operands().at(ind).info().total_size(); } return size; } -static bool isQuant(const ir::Graph &graph, const ir::Operation &node) +bool isQuant(const ir::Graph &graph, const ir::IOperation &node) { for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED) { @@ -54,18 +52,17 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node) return false; } -static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &, - bool) +bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::IOperation &, bool) { // Now, there is no workaround return false; } // if a node can be merged into op_seq -static bool isMergeable(const ir::Graph &graph, const ir::Operation &node) +bool isMergeable(const ir::Graph &graph, const ir::IOperation &node) { size_t prev_op_cnt = 0; - for (const auto &input : node.getInputs()) + for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED) { // only valid_inputs const auto &operand = graph.operands().at(input); @@ -85,15 +82,23 @@ static bool isMergeable(const ir::Graph &graph, const ir::Operation &node) return true; } +} // namespace + +namespace onert +{ + +namespace compiler +{ + void HEScheduler::scheduleShufflingBackends() { VERBOSE(HEScheduler::schedule) - << "Started task scheduling: uses all backends to get more metrics for data transfer" - << std::endl; + << "Started task scheduling: uses all backends to get more metrics for data transfer" + << std::endl; size_t backend_ind = 0; for (const auto &rank : _rank_to_op) { - VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl; + VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second << ")" << std::endl; const auto &node = _graph->operations().at(rank.second); const bool quant = isQuant(*_graph, node); const auto size = getOperationsFlattenedIOSize(*_graph, node); @@ -115,7 +120,7 @@ void HEScheduler::scheduleShufflingBackends() continue; } const auto exec_time = - _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size); + _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size); // Scheduling to measure data transfer must be done after measuring all backends separately assert(exec_time != _exec_time->NOT_FOUND); if (exec_time == _exec_time->getMax()) @@ -132,7 +137,7 @@ void HEScheduler::scheduleShufflingBackends() } } -bool HEScheduler::isNodeProfiled(const ir::Operation &node) +bool HEScheduler::isNodeProfiled(const ir::IOperation &node) { const bool quant = isQuant(*_graph, node); const auto size = getOperationsFlattenedIOSize(*_graph, node); @@ -202,7 +207,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph { // Check if profiling info about all backend/node pairs already exists bool all_nodes_are_profiled = true; - _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { + _graph->operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) { if (all_nodes_are_profiled) all_nodes_are_profiled = isNodeProfiled(op); }); @@ -219,7 +224,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph ir::OperationIndexMap<bool> visited; graph.operations().iterate( - [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; }); + [&](const ir::OperationIndex &index, const ir::IOperation &) { visited[index] = false; }); // for each task select the backend with the smallest earliest finishing time(eft) for (const auto &rank : _rank_to_op) { @@ -248,19 +253,20 @@ int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend, if (time != _exec_time->NOT_FOUND) return time; + // FIXME permute time is not recorded so the control reaches here always // Makes the scheduler prefer keeping computations on one backend - return size / 200; + return size / 400; } -int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend) +int64_t HEScheduler::tryBackend(const ir::IOperation &node, const backend::Backend *backend) { // if there is no profiling info don't use this backend during scheduling if (!_is_profiling_mode) { VERBOSE(HEScheduler::tryBackend) - << "Trying to HE schedule while there is no profiling info for " << node.name() - << " on backend " << backend->config()->id() << ". So this backend won't be used. " - << std::endl; + << "Trying to HE schedule while there is no profiling info for " << node.name() + << " on backend " << backend->config()->id() << ". So this backend won't be used. " + << std::endl; _is_supported[backend][node.name()] = false; return _exec_time->getMax(); } @@ -291,10 +297,10 @@ void HEScheduler::makeRank() VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl; _graph->operations().iterate( - [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); }); + [&](const ir::OperationIndex &index, const ir::IOperation &) { DFSMaxRank(index); }); // Check that ranks are calculated for all operations(nodes) - _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { UNUSED_RELEASE(index); assert(_op_to_rank->find(index) != _op_to_rank->end()); }); @@ -360,8 +366,8 @@ int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index) assert(rank >= 0); _rank_to_op.emplace(rank, index); _op_to_rank->emplace(index, rank); - VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name() - << " is " << rank << std::endl; + VERBOSE(HEScheduler::DFSMaxRank) + << "rank of operation (" << index << ")" << node.name() << " is " << rank << std::endl; return rank; } @@ -370,7 +376,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index) { const auto &node = _graph->operations().at(index); int64_t max_child_rank = 0; - for (const auto &output : node.getOutputs()) + for (const auto &output : node.getOutputs() | ir::Remove::UNDEFINED) { const auto &operand = _graph->operands().at(output); const bool quant = operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM; @@ -384,9 +390,9 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index) { continue; } - // TODO Change it to controlflow backend + // TODO Change it to builtin backend auto transfer_cost = - getPermuteTime(backend, other_backend, quant, operand.info().total_size()); + getPermuteTime(backend, other_backend, quant, operand.info().total_size()); avg_transfer_cost += transfer_cost; } } @@ -403,7 +409,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index) int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time, const int64_t &time_amount) { - const auto backend_times = _backends_avail_time.at(backend); + const auto &backend_times = _backends_avail_time.at(backend); // finishing and starting times of an op, that will come after current op auto next_op_fst = backend_times.upper_bound(starting_time); // finishing time of an op, that will come before current op @@ -419,7 +425,7 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend, bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend) { - VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl; + VERBOSE(HEScheduler::schedule) << "scheduling (" << index << ")" << std::endl; int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0; const auto &node = _graph->operations().at(index); @@ -506,7 +512,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation // Find free time for data transferring and insert it into backend taskset. This is needed: // 1. Time for multiple permutations for this node's input is found correctly // 2. If backend==cpu, then free time for this node must come after permutations - for (auto &it : transfer_st_exec_time) + for (auto &&it : transfer_st_exec_time) { if (_is_parallel_exec) { @@ -542,27 +548,27 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation if (!_is_parallel_exec) { VERBOSE(HEScheduler::ESTAndExecTime) - << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on " - << backend->config()->id() << " is " << exec_time - << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl; + << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on " + << backend->config()->id() << " is " << exec_time + << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl; return {total_transfer_cost, exec_time}; } VERBOSE(HEScheduler::ESTAndExecTime) - << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on " - << backend->config()->id() << ": " << exec_time - << " microseconds. Backend available time: " << prev_op_ft - << " Parent's max eft: " << max_pred_eft - total_transfer_cost - << " data transfer cost: " << total_transfer_cost << std::endl; + << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on " + << backend->config()->id() << ": " << exec_time + << " microseconds. Backend available time: " << prev_op_ft + << " Parent's max eft: " << max_pred_eft - total_transfer_cost + << " data transfer cost: " << total_transfer_cost << std::endl; return {prev_op_ft, exec_time}; } -int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node, +int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::IOperation &node, std::multimap<int64_t, int64_t> &transfer_st_exec_time) { int64_t max_pred_eft = 0; - for (const auto &input_operand_idx : node.getInputs()) + for (const auto &input_operand_idx : node.getInputs() | ir::Remove::UNDEFINED) { const auto &input_operand = _graph->operands().at(input_operand_idx); const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM; @@ -578,7 +584,7 @@ int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Opera { // Multiply operand size by 2 because size must describe input+output size int64_t transfer_cost = - getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2); + getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2); transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost); } } diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h index b9cee5881..df6c07926 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.h +++ b/runtime/onert/core/src/compiler/HEScheduler.h @@ -23,14 +23,16 @@ #ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_ #define __ONERT_COMPILER_H_E_SCHEDULER_H_ -#include "compiler/IScheduler.h" -#include "compiler/BackendManager.h" -#include "compiler/Compiler.h" -#include "ir/Graph.h" -#include "exec/ExecTime.h" -#include "backend/Backend.h" -#include <memory> -#include "ir/OperationIndexMap.h" +#include "IScheduler.h" +#include "../backend/builtin/Config.h" +#include "../exec/ExecTime.h" + +#include <backend/Backend.h> +#include <compiler/BackendManager.h> +#include <compiler/Compiler.h> +#include <ir/Graph.h> +#include <ir/OperationIndexMap.h> + #include <map> #include <memory> @@ -50,26 +52,26 @@ public: * @param[in] model Graph model * @param[in] backend_resolver backend resolver */ - HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options) - : _is_supported{}, _backends_avail_time{}, _ops_eft{}, - _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()}, - _is_profiling_mode{options.he_profiling_mode}, - _is_linear_exec{options.executor == "Linear"}, - _is_parallel_exec{options.executor == "Parallel"} + HEScheduler(const std::vector<const backend::Backend *> &backends, const CompilerOptions &options) + : _is_supported{}, _backends_avail_time{}, _ops_eft{}, + _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()}, + _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"}, + _is_parallel_exec{options.executor == "Parallel"} { - for (auto &entry : backend_contexts) + for (auto &&entry : backends) { - if (entry.first->config()->id() == backend::controlflow::Config::ID) + if (entry->config()->id() == backend::builtin::Config::ID) continue; - _all_backends.push_back(entry.first); + _all_backends.push_back(entry); } _backend_resolver = std::make_unique<compiler::BackendResolver>(); _exec_time = std::make_unique<exec::ExecTime>(_all_backends); // Find cpu backend - auto cpu_backend_it = std::find_if( - _all_backends.begin(), _all_backends.end(), - [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; }); + auto cpu_backend_it = + std::find_if(_all_backends.begin(), _all_backends.end(), [](const backend::Backend *backend) { + return backend->config()->id() == "cpu"; + }); if (cpu_backend_it == _all_backends.end()) throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available"); _cpu_backend = *cpu_backend_it; @@ -86,7 +88,7 @@ public: std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; } private: - bool isNodeProfiled(const ir::Operation &); + bool isNodeProfiled(const ir::IOperation &); bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend); /** @@ -113,7 +115,7 @@ private: * * @return earliest finishing time of parent nodes */ - int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node, + int64_t predMaxEFT(const backend::Backend *backend, const ir::IOperation &node, std::multimap<int64_t, int64_t> &transfer_st_exec_time); void makeRank(); @@ -144,7 +146,7 @@ private: void scheduleShufflingBackends(); - int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend); + int64_t tryBackend(const ir::IOperation &node, const backend::Backend *backend); /** * @brief Schedule a node and its successor until: @@ -173,7 +175,7 @@ private: std::unique_ptr<exec::ExecTime> _exec_time; const ir::Graph *_graph{nullptr}; std::vector<const backend::Backend *> _all_backends; - const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend + const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to _builtin_backend bool _is_profiling_mode; bool _is_linear_exec; bool _is_parallel_exec; diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc new file mode 100644 index 000000000..1654bfc8b --- /dev/null +++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "HEScheduler.h" +#include "../exec/ExecTime.h" + +#include <ir/DataType.h> +#include <ir/InternalType.h> +#include <ir/Shape.h> +#include <ir/TypeInfo.h> +#include <ir/operation/BinaryArithmetic.h> +#include <ir/operation/FullyConnected.h> + +#include <gtest/gtest.h> + +namespace +{ +using namespace onert; +using namespace ir; +using namespace backend; +using namespace operation; +using namespace exec; + +// +// Mock backends classes +// + +struct MockConfigCPU : public IConfig +{ + std::string id() override { return "cpu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + Layout supportLayout(const IOperation &, Layout) override { return Layout::UNKNOWN; } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +class MockBackendContext : public BackendContext +{ +public: + using BackendContext::BackendContext; + ITensorRegistry *genTensors() override { return nullptr; } + FunctionMap genKernels() override { return {}; } +}; + +struct MockBackendCPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); } + std::unique_ptr<BackendContext> newContext(ContextData &&data) const override + { + return std::make_unique<MockBackendContext>(this, std::move(data), nullptr); + } +}; + +struct MockConfigGPU : public IConfig +{ + std::string id() override { return "gpu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackendGPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); } + std::unique_ptr<BackendContext> newContext(ContextData &&data) const override + { + return std::make_unique<MockBackendContext>(this, std::move(data), nullptr); + } +}; + +struct MockConfigNPU : public IConfig +{ + std::string id() override { return "npu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackendNPU : public Backend +{ + std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); } + std::unique_ptr<BackendContext> newContext(ContextData &&data) const override + { + return std::make_unique<MockBackendContext>(this, std::move(data), nullptr); + } +}; + +// +// Constants +// + +const int OPERAND_ELEMS = 268203; +const int OPERAND_SIZE = OPERAND_ELEMS * 4; +const int OPERATION_SIZE = OPERAND_SIZE * 3; + +const std::string LINEAR("Linear"); +const std::string DATAFLOW("Dataflow"); +const std::string PARALLEL("Parallel"); + +// +// Helper functions +// + +// Set executor through environment variable +void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); } + +// Set profiling mode through environment variable +void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); } + +// Calculate operation size by addition sizes of all input and output operands +uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx) +{ + uint32_t size = 0; + const auto &op = graph->operations().at(op_idx); + for (const auto &ind : op.getInputs() + op.getOutputs()) + size += graph->operands().at(ind).info().total_size(); + return size; +} + +// Set execution operation time. This method is needed since ExecutionTime has only +// 'updateOperationExecTime' method. +void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set); + assert(et.getOperationExecTime(backend, operation, quant, op_size) == time); +} + +// Set same execution time for all given backends/operations +void setOperationsExecutionTime(const std::vector<const Backend *> &backends, + const std::vector<std::string> &op_names, + const std::vector<uint32_t> &op_sizes, int64_t exec_time) +{ + assert(op_names.size() == op_sizes.size()); + ExecTime et(backends); + for (int i = 0; i < op_names.size(); ++i) + { + for (const auto backend : backends) + setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); + } + et.storeOperationsExecTime(); +} + +// Set permute time from one backend to another. This method is needed since ExecutionTime has only +// 'updatePermuteTime' method. +void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set); + assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time); +} + +// Set same permutation time between all given backends +void setPermutationsExecutionTime(const std::vector<const Backend *> &backends, + const int operand_size, const int64_t exec_time) +{ + ExecTime et(backends); + for (const auto &backend : backends) + { + for (const auto other_backend : backends) + { + if (backend == other_backend) + continue; + setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); + } + } + et.storeOperationsExecTime(); +} + +// +// Functions for creating graphs +// + +using OIS = OperandIndexSequence; + +template <typename NodeT, typename... Types> +OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args) +{ + auto op = std::make_unique<NodeT>(std::forward<Types>(args)...); + auto op_idx = graph->addOperation(std::move(op)); + // For now in scheduler test all operations in tested graphs has same size (for simplicity) + assert(calcOpSize(graph, op_idx) == OPERATION_SIZE); + return op_idx; +} + +// Create straight graph: Add->Sub->Mul +std::shared_ptr<Graph> createStraightGraph() +{ + auto graph = std::make_shared<Graph>(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); + + // Create sub node + auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params); + + // Create mul node + auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params); + + graph->verify(); + return graph; +} + +/* Create branched graph: + * [Add] + * // \\ + * [Mul1] [FC2] + * || || + * [Mul2] [FC2] + * \\ // + * [Sub] + */ +std::shared_ptr<Graph> createBranchedGraph() +{ + auto graph = std::make_shared<Graph>(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); + + // Create mul1 node + auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}, + mul1_op_params); + + // Create mul2 node + auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}, + mul2_op_params); + + // Create fc1 node + auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + FullyConnected::Param fc1_op_params{Activation::NONE}; + create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params); + + // Create fc2 node + auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + FullyConnected::Param fc2_op_params{Activation::NONE}; + create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params); + + // Create sub node + auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params); + + graph->verify(); + return graph; +} + +// +// Tests setup/teardown +// + +// SetUp/TearDown methods runs before/after each test and performs actions common for each test +class HESchedulerTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Initialize mock backends + _cpu_backend = new MockBackendCPU(); + _gpu_backend = new MockBackendGPU(); + _npu_backend = new MockBackendNPU(); + _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend}; + + // Remove previous profile data if it exists + if (!remove("exec_time.json")) + { + // DO NOTHING (no profile data) + } + + // Remember original value of 'EXECUTOR' environment variable + char *executor = std::getenv("EXECUTOR"); + _original_executor = executor == nullptr ? "" : executor; + + // Remember original value of 'PROFILING_MODE' environment variable + char *profiling_mode = std::getenv("PROFILING_MODE"); + _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode; + } + + void TearDown() override + { + delete _cpu_backend; + delete _gpu_backend; + delete _npu_backend; + EXPECT_EQ(remove("exec_time.json"), 0); + setenv("EXECUTOR", _original_executor.c_str(), true); + setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true); + } + + const MockBackendCPU *_cpu_backend{nullptr}; + const MockBackendGPU *_gpu_backend{nullptr}; + const MockBackendNPU *_npu_backend{nullptr}; + std::vector<const Backend *> _mock_backends; + + std::string _original_executor; + std::string _original_profiling_mode; +}; + +// +// HEScheduler tests +// + +class HESchedulerTestWithExecutorParam : public HESchedulerTest, + public testing::WithParamInterface<std::string> +{ +}; + +// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - +// one time for each executor +INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam, + testing::Values(LINEAR, DATAFLOW, PARALLEL)); + +// Test scheduler behavior for straight graph with known execution time of all nodes and permutes. +TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) +{ + setExecutor(GetParam()); + + // Prepare graph + ir::Model model; + auto graph(createStraightGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: scheduler assigns different backend to each node + { + // For each backend reduce execution time of one node + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time + { + // Increase transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu"); + } +} + +// Test scheduler behavior for branched graph with known execution time of all nodes and permutes +TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) +{ + const int64_t NPU_ET = 5000; + setExecutor(GetParam()); + + // Prepare graph + ir::Model model; + auto graph(createBranchedGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all + // nodes, in case of parallel executor scheduler assigns different backends to branches. + { + // Reduce execution time + ExecTime et(_mock_backends); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + + std::string branch1_expected_backend("npu"), branch2_expected_backend("npu"); + if (GetParam() == PARALLEL) + { + branch1_expected_backend = + br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; + branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; + } + + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes + { + // Increase execution time for GPU backend + ExecTime et(_mock_backends); + /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt * + * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the + * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter + * branching or scheduler assigns another backend to a node*/ + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } +} + +// Test scheduler behavior for branched graph and enabled profiling mode +TEST_F(HESchedulerTest, branched_graph_profiling_mode) +{ + const int ET = 1e5; + + // Turn on profiling mode + setProfilingMode(true); + setExecutor(DATAFLOW); + + // Prepare graph + ir::Model model; + auto graph(createBranchedGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Test 1 + // Expected behaviour: scheduler assigns backends to nodes with unknown execution time + { + // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + } + + // Test 2 + // Expected behaviour: scheduler shuffling backends, so different backends are assigned to + // neighbor nodes + { + // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC) + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(mul1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(fc1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(), + br->getBackend(mul2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(), + br->getBackend(fc2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + } +} + +// TODO: Add tests with unknown execution and permutation time + +} // unnamed namespace diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 49a989500..4dbe229c8 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -14,207 +14,38 @@ * limitations under the License. */ -#include <algorithm> - #include "Linear.h" -#include "backend/IConfig.h" -#include "backend/IConstantInitializer.h" -#include "backend/ITensorRegister.h" -#include "backend/Backend.h" +#include "../dumper/text/GraphDumper.h" + #include "util/logging.h" +#include <sstream> + namespace onert { namespace compiler { -std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph) +// TODO(easy) Change the LoweredGraph param to Graph +std::vector<ir::OperationIndex> Linear::linearize(const compiler::ILoweredGraph &lowered_graph) { - std::vector<ir::OpSequenceIndex> order; - lowered_graph.iterateTopolOpSeqs( - [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) -> void { - order.emplace_back(index); - }); - return order; + return lowered_graph.graph().topolSortOperations(); } -void Linear::dump(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order) +// TODO(easy) Change the LoweredGraph param to Graph +void Linear::dump(const compiler::ILoweredGraph &lowered_graph, + const std::vector<ir::OperationIndex> &order) { + for (const auto &ind : order) { - const auto &toString = [](const onert::backend::Backend *backend) { - assert(backend); - std::string str; - str += backend->config()->id(); - return "{" + str + "}"; - }; - - VERBOSE(Linear) << "Final OpSequence" << std::endl; - for (const auto index : order) - { - const auto &op_seq = lowered_graph.op_seqs().at(index); - const auto lower_info = lowered_graph.getLowerInfo(index); - const auto &operations = lowered_graph.graph().operations(); - VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " " - << ir::getStrFromOpSeq(op_seq, operations) << std::endl; - } + // TODO Could logging system can handle this? (Inserting prefix for each line) + std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)}; + std::string line; + while (std::getline(iss, line)) + VERBOSE(GraphDumper) << line << std::endl; } } -void Linear::planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order) -{ - const auto &graph = lowered_graph.graph(); - ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map; - - ir::OperandIndexMap<uint32_t> uses_map; - ir::OperandIndexMap<uint32_t> def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - const auto lower_info = lowered_graph.getLowerInfo(ind); - // TODO Remove if onert doesn't support anymore such as - // GeneratedTests.reshape_quant8_weights_as_inputs - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - !graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - // Unused input of subgraph - // TODO Register unused input as nullptr in tensor_builder - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - bool is_const = obj.isConstant(); - if (is_const) - { - constants.append(ind); - } - - auto factor = lower_info->def_factors().getOnlyElement(); - auto backend = factor.backend(); - auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder; - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any op_seq (No use and def) - const auto info = obj.info(); - const auto backend_layout = factor.layout(); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, backend_layout); - } - - tensor_builder_map[ind] = tensor_builder; - }); - - // If a tensor is model output, increase the use of the tensor. - // This aim is same to above one. - for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED) - { - uses_map[ind]++; - } - - // Start scanning to do notify{First|Last}Use for each tensor - - // If a tensor is a constant, increase the use of the tensor. - // It makes the tensor not be dealloced. It means these will be deallocated last. - // And allocate constant operands first - VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) - { - uses_map[ind]++; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - - // Allocate Model's inputs - VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl; - for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED) - { - auto tensor_builder = tensor_builder_map[ind]; - if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs - continue; - tensor_builder->notifyFirstUse(ind); - } - - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - VERBOSE(LINEAR) << "TENSORS" << std::endl; - for (const auto op_seq_ind : order) - { - const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind); - for (const auto &op_idx : op_seq.operations()) - { - for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - } - - for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder_map[ind]->notifyLastUse(ind); - - // plan for deallocation of dynamic tensor - auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager(); - if (dyn_tensor_manager) - dyn_tensor_manager->planDealloc(op_idx, ind); - } - } - } - } - - // Dispose and validate - for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); -} - } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h index 1e24cf92b..4f92dc88d 100644 --- a/runtime/onert/core/src/compiler/Linear.h +++ b/runtime/onert/core/src/compiler/Linear.h @@ -20,18 +20,8 @@ #include <vector> #include <memory> -#include "ir/OpSequences.h" #include "ir/Index.h" -#include "backend/ITensorBuilder.h" -#include "compiler/LoweredGraph.h" - -namespace onert -{ -namespace ir -{ -struct OperationVisitor; -} // namespace ir -} // namespace onert +#include "compiler/ILoweredGraph.h" namespace onert { @@ -41,11 +31,9 @@ namespace compiler class Linear { public: - static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph); - static void dump(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order); - static void planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector<ir::OpSequenceIndex> &order); + static std::vector<ir::OperationIndex> linearize(const compiler::ILoweredGraph &lowered_graph); + static void dump(const compiler::ILoweredGraph &lowered_graph, + const std::vector<ir::OperationIndex> &order); }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 1489a1884..46a45e44a 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -16,21 +16,23 @@ #include "compiler/LoweredGraph.h" -#include <assert.h> -#include <sstream> -#include "util/logging.h" -#include "compiler/pass/ConstantInsertionPass.h" -#include "compiler/pass/ConstantLoweringPass.h" -#include "compiler/pass/PermutationOperationPass.h" -#include "compiler/pass/PermutationInsertionPass.h" -#include "compiler/pass/PermutationEliminationPass.h" -#include "ir/GraphIterator.h" -#include "ir/verifier/Verifier.h" +#include "HEScheduler.h" +#include "ManualScheduler.h" +#include "pass/ConstantInsertionPass.h" +#include "pass/ConstantLoweringPass.h" +#include "pass/PassRunner.h" +#include "pass/PermutationEliminationPass.h" +#include "pass/PermutationInsertionPass.h" +#include "pass/PermutationOperationPass.h" +#include "../dumper/text/GraphDumper.h" +#include "../ir/verifier/Verifier.h" + #include "backend/Backend.h" -#include "backend/IConfig.h" #include "compiler/BackendResolver.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" +#include "util/logging.h" + +#include <cassert> +#include <sstream> namespace onert { @@ -39,18 +41,15 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { - bool linear_executor = (options.executor == "Linear"); + lowerGraph(options); +} +void LoweredGraph::lowerGraph(const CompilerOptions &options) +{ // Build backend contexts auto &backend_manager = BackendManager::get(); - - // Always create Controlflow backend context - auto cf_backend = backend_manager.getControlflow(); - _backend_contexts.emplace( - cf_backend, cf_backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor)); - // Create contexts for other backends - for (auto backend_str : options.backend_list) + for (auto &&backend_str : options.backend_list) { backend_manager.loadBackend(backend_str); auto backend = backend_manager.get(backend_str); @@ -60,12 +59,9 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option // we should change it back(throw if backend is not loaded) later. if (!backend) { - VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str; + VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl; continue; } - - _backend_contexts.emplace( - backend, backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor)); } if (backend_manager.num_backends() == 0) throw std::runtime_error{"No available backends loaded."}; @@ -73,317 +69,115 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option // TODO Move "schedule" phase out of here // Schedule std::unique_ptr<BackendResolver> backend_resolver; + auto all_backends = backend_manager.getAll(); if (options.he_scheduler) { - auto scheduler = HEScheduler(_backend_contexts, options); + auto scheduler = HEScheduler(all_backends, options); backend_resolver = scheduler.schedule(_graph); _indexed_ranks = scheduler.getIndexedRanks(); } else { - auto scheduler = ManualScheduler(_backend_contexts, options); + auto scheduler = ManualScheduler(all_backends, options); backend_resolver = scheduler.schedule(_graph); } - { - // operand::LowerInfo holder - ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info; - - _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { - operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>(); - }); - - // Make op_seqs while checking whether a node can be merged into a op_seq. - makeOpSequences(operands_lower_info, options, *backend_resolver); + makeLowerInfo(*backend_resolver); + VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl; + dumper::text::dumpLoweredGraph(*this); - _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - assert(op_seq.operations().size() > 0); - std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations())); - }); + // Mandatory passes - kind of legalization(?) + pass::PassRunner{} + .append(std::make_unique<pass::ConstantInsertionPass>(*this)) + .append(std::make_unique<pass::ConstantLoweringPass>(*this)) + .append(std::make_unique<pass::PermutationOperationPass>(*this)) + .append(std::make_unique<pass::PermutationInsertionPass>(*this)) + .run(); - VERBOSE(OpSequences) << "dump without permutation" << std::endl; - dumpOpSequences(_op_seqs, _graph.operations()); + dumpLowerInfo(); - pass::ConstantInsertionPass ci_pass(*this); - ci_pass.run(); + // Optimization passes (optional) + pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run(); - pass::ConstantLoweringPass cl_pass(*this); - cl_pass.run(); - - // Set LowerInfo for each operand from the operand::LowerInfo holder - manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph); - - dumpLowerInfo(); - } - - // Run Permutation Passes - { - pass::PermutationOperationPass po_pass(*this); - po_pass.run(); - - pass::PermutationInsertionPass pi_pass(*this); - pi_pass.run(); - - pass::PermutationEliminationPass pe_pass(*this); - pe_pass.run(); - - VERBOSE(OpSequences) << "dump with permutation" << std::endl; - dumpOpSequences(_op_seqs, _graph.operations()); - } + VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl; + for (auto &&operand : _graph.getInputs()) + VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl; + for (auto &&operand : _graph.getOutputs()) + VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl; + dumper::text::dumpLoweredGraph(*this); // Graph verifications { + assert(ir::verifier::InputOutputChecker().verify(_graph)); assert(ir::verifier::DAGChecker().verify(_graph)); - assert(ir::verifier::EdgeConsistencyChecker().verify(_graph)); + assert(ir::verifier::EdgeChecker().verify(_graph)); } } -const ir::operation::LowerInfo * -LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const +void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver) { - auto itr = _lower_info_map.op_seq.find(op_seq_index); - if (itr == _lower_info_map.op_seq.end()) - return nullptr; - return itr->second.get(); -} - -void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index, - std::unique_ptr<ir::operation::LowerInfo> &&lower_info) -{ - _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info))); -} + _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + lower_info().operand.set(index, std::make_unique<OperandLowerInfo>()); + }); -void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index) -{ - auto &op_seq_lower_info = _lower_info_map.op_seq; - assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end()); - for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it) - { - if (it->first == op_seq_index) + // Set operand lower info using assigned backends to operations + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) { + const ir::IOperation &op = _graph.operations().at(op_ind); + auto backend = backend_resolver.getBackend(op_ind); + if (!backend) { - op_seq_lower_info.erase(it); - break; + throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"}; } - } -} - -const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const -{ - auto itr = _lower_info_map.operand.find(index); - if (itr == _lower_info_map.operand.end()) - return nullptr; - return itr->second.get(); -} - -ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) -{ - auto itr = _lower_info_map.operand.find(index); - if (itr == _lower_info_map.operand.end()) - return nullptr; - return itr->second.get(); -} - -void LoweredGraph::setLowerInfo(const ir::OperandIndex &index, - std::unique_ptr<ir::operand::LowerInfo> &&lower_info) -{ - _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info))); -} - -void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index) -{ - _lower_info_map.operand.erase(index); -} - -void LoweredGraph::iterateTopolOpSeqs( - const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const -{ - // Topological Sorting for ir::OpSequences - std::vector<ir::OpSequenceIndex> topol_sorted; - ir::PostDfsIterator<true>{}.iterateOpSeqs( - *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) { - topol_sorted.emplace_back(index); - }); - std::reverse(topol_sorted.begin(), topol_sorted.end()); - for (const auto op_seq_idx : topol_sorted) - { - const auto &op_seq = _op_seqs.at(op_seq_idx); - fn(op_seq_idx, op_seq); - } -} - -void LoweredGraph::iterateTopolOpSeqs( - const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn) -{ - // Topological Sorting for ir::OpSequences - std::vector<ir::OpSequenceIndex> topol_sorted; - ir::PostDfsIterator<false>{}.iterateOpSeqs( - *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) { - topol_sorted.emplace_back(index); - }); - std::reverse(topol_sorted.begin(), topol_sorted.end()); - for (const auto op_seq_idx : topol_sorted) - { - auto &op_seq = _op_seqs.at(op_seq_idx); - fn(op_seq_idx, op_seq); - } -} - -ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index, - const ir::Operation &node) -{ - // Create a fresh op_seq with one operation, and append it to op_seqs - // Create a fresh op_seq - auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout()); - - // Add an operation - op_seq->appendOperation(node_index); - - // Update input/output - op_seq->setOutputs(node.getOutputs()); - op_seq->setInputs(node.getInputs()); - - return _op_seqs.emplace(std::move(op_seq)); -} - -void LoweredGraph::makeOpSequences( - ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, - const CompilerOptions &options, const BackendResolver &backend_resolver) -{ - // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq - const int op_seq_max_node = options.op_seq_max_node; - assert(op_seq_max_node >= 0); - - bool is_profiling = options.he_profiling_mode; - ir::OpSequence *op_seq = nullptr; - ir::OpSequenceIndex op_seq_index; - - // NOTE: The below method appends nodes while making one op_seq if needed. If something better - // ways, happy to update this code. - ir::PostDfsConstIterator{}.iterate( - _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) { - // LowerInfo for in/output operands - auto backend = backend_resolver.getBackend(node_index); - - // Get frontend's layout - auto frontend_layout = _graph.layout(); - - // The layout of each backend should be set at another place - // TODO Change setting layout of each backend at another place - auto backend_layout = backend->config()->supportLayout(node, frontend_layout); - - for (auto operand : node.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(operand); - lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout}); - } - for (auto operand : node.getOutputs()) - { - auto &&lower_info = operands_lower_info.at(operand); - lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout}); - } - - bool new_op_seq = (op_seq == nullptr || - (op_seq_max_node != 0 && - op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node))); - - // for profiling each op_seq must contain just one node, - // so that we can measure a node separately - if (new_op_seq || is_profiling || - !mergeable(op_seq_index, node_index, backend_layout, backend_resolver)) - { - auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node); - - // ir::OpSequence LowerInfo - setLowerInfo(new_op_seq_index, - std::make_unique<ir::operation::LowerInfo>(backend, backend_layout)); - - op_seq_index = new_op_seq_index; - op_seq = &(_op_seqs.at(new_op_seq_index)); - - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is created for " - << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl; - } - else - { - op_seq->appendOperation(node_index); - // Set inputs - auto new_inputs = node.getInputs(); - // Add inputs except outputs of the previous node - for (auto ind : op_seq->getInputs()) - { - if (!node.getOutputs().contains(ind)) - new_inputs.append(ind); - } - op_seq->setInputs(new_inputs); - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " merges " - << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl; - } - }); -} + auto frontend_layout = _graph.layout(); -void LoweredGraph::manipulateLowerInfo( - ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, - bool is_primary) -{ - const auto controlflow_backend = BackendManager::get().getControlflow(); + // The layout of each backend should be set at another place + // TODO Change setting layout of each backend at another place + auto backend_layout = backend->config()->supportLayout(op, frontend_layout); - // TODO Rather than handling primary graph specially, - // let the permute inserted and remove it later - if (is_primary) - { - // TODO Rather than using NHWC Get frontend layout of this node from IR - auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) + for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED) { - auto &&lower_info = operands_lower_info.at(index); - assert(lower_info->def_factors().empty()); - lower_info->addDefPermuteFactor(factor); + auto &operand_li = lower_info().operand.at(ind); + operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout}); } - for (auto index : _graph.getOutputs()) + for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED) { - auto &&lower_info = operands_lower_info.at(index); - lower_info->addUsePermuteFactor(factor); + auto &operand_li = lower_info().operand.at(ind); + operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout}); } - } - else + lower_info().operation.set( + op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout)); + }); + + // Handle graph inputs and outputs + const auto builtin_backend = BackendManager::get().getBuiltin(); + auto factor = PermuteFactor{builtin_backend, _graph.layout()}; + for (auto &&index : _graph.getInputs() | ir::Remove::UNDEFINED) { - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0)) - { - // In case of not that Graph's input is not used in any operation and not the graph's - // output. - // In other words, it is not unused input in Graph. - lower_info->addDefPermuteFactor(*lower_info->use_factors().begin()); - } - else - { - // In case of that an operand is Graph's input and not input or output of any operation - lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ - controlflow_backend, - ir::Layout::NHWC // TODO Get frontend layout of this node from IR - }); - } - } + auto &operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(factor); } - for (auto index : _graph.getOutputs()) + for (auto &&index : _graph.getOutputs() | ir::Remove::UNDEFINED) { - auto &&lower_info = operands_lower_info.at(index); - if (lower_info->def_factors().size() == 0) - { - // In case of that an operand is Graph's output and not input or output of any operation - lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ - controlflow_backend, - ir::Layout::NHWC // TODO Get frontend layout of this node from IR - }); - } + auto &operand_li = lower_info().operand.at(index); + operand_li.addUsePermuteFactor(factor); } - // Set LowerInfo for each operand from the operand::LowerInfo holder - _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) { - setLowerInfo(index, std::move(operands_lower_info[index])); + // Handle variable tensors + _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) { + // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs + // and not undefined operand - these are variable tensors. For example, + // UnidirectionalSequenceLSTM has such inputs. + if (operand.info().isVariable()) + { + // The variable operand with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + auto operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement()); + } }); } @@ -395,12 +189,22 @@ void LoweredGraph::dumpLowerInfo() std::map<uint32_t, std::string> dumps; _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) { - std::stringstream sstream; - if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty()) + const auto operand_lower_info = lower_info().operand.getRawPtr(index); + assert(operand_lower_info); + if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty()) { - auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) { + auto shape_to_string = [](const ir::Shape &shape) { + std::stringstream sstream; + sstream << "{ "; + for (auto i = 0; i < shape.rank(); ++i) + sstream << (shape.dim(i)) << " "; + sstream << "}"; + return sstream.str(); + }; + + auto factors_to_string = [](const PermuteFactorSet &factors) { std::string str; - for (auto factor : factors) + for (auto &&factor : factors) { str += factor.backend()->config()->id(); str += "(" + to_string(factor.layout()) + ")"; @@ -409,159 +213,45 @@ void LoweredGraph::dumpLowerInfo() return "{ " + str + "}"; }; - auto operation_index_to_string = [](const ir::OperationIndexSet &operations) { - std::string str; - for (auto op : operations) - { - str += std::to_string(op.value()); - str += " "; - } - return "{ " + str + "}"; + auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) { + std::stringstream sstream; + sstream << "{ "; + for (auto &&op : operations) + sstream << op << " "; + sstream << "}"; + return sstream.str(); + }; + + auto data_to_str = [](const ir::Data *data) { + return (data ? (std::to_string(data->size()) + " bytes") : "N/A"); }; - const auto lower_info = getLowerInfo(index); - const auto &shape = object.shape(); - std::string def_ops = - object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A"; - std::string use_ops = operation_index_to_string(object.getUses()); - std::string def_layouts = factors_to_string(lower_info->def_factors()); - std::string use_layouts = factors_to_string(lower_info->use_factors()); - sstream << "Operand #" << index.value() << " LowerInfo" << std::endl; - sstream << " - Shape : { "; - for (auto i = 0; i < shape.rank(); ++i) - { - sstream << (shape.dim(i)) << " "; - } - sstream << "}" << std::endl; - sstream << " - Def ir::Operations : " << def_ops << std::endl; - sstream << " - Use ir::Operations : " << use_ops << std::endl; - sstream << " - Lower Info" << std::endl; - sstream << " - Def Backends : " << def_layouts << std::endl; - sstream << " - Use Backends : " << use_layouts << std::endl; + std::string shape_str = shape_to_string(object.shape()); + std::string def_op = operation_index_set_to_string({object.getDef()}); + std::string use_ops = operation_index_set_to_string(object.getUses()); + std::string def_factors = factors_to_string(operand_lower_info->def_factors()); + std::string use_factors = factors_to_string(operand_lower_info->use_factors()); + std::stringstream sstream; + sstream << "Operand " << index << " Info" << std::endl; + sstream << " - Shape : " << shape_str << std::endl; + sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl; + sstream << " - Data : " << data_to_str(object.data()) << std::endl; + sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl; + dumps.emplace(index.value(), sstream.str()); } - dumps.emplace(index.value(), sstream.str()); }); for (const auto &e : dumps) { if (!e.second.empty()) { - VERBOSE(Lower) << e.second; + std::istringstream iss(e.second); + std::string line; + while (std::getline(iss, line)) + VERBOSE(Lower) << line << std::endl; } } } -bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index, - const ir::OperationIndex &node_index, ir::Layout layout, - const BackendResolver &backend_resolver) -{ - // Are they mergeable? - // 1. the same backend id and layout? - // 2. Is op_seq or node branched? - // 3. if 1 is true, the op_seq and a node are connected? - const auto &op_seq = _op_seqs.at(op_seq_index); - const auto &node = _graph.operations().at(node_index); - - // The same backend id and layout? - { - const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout(); - const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id(); - const auto &node_backend_id = backend_resolver.getBackend(node_index)->config()->id(); - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " { " << op_seq_backend_id << "(" - << to_string(op_seq_backend_layout) << ") } " - << " NODE#" << node_index.value() << " (" << node.name() << ") { " - << node_backend_id << "(" << to_string(layout) << ") } " << std::endl; - if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout) - return false; - } - - // Branched? - { - std::unordered_set<ir::OperationIndex> branched_set; - - // Check for branching up - for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) - { - const auto &input_obj = _graph.operands().at(input); - auto def = input_obj.getDef(); - if (def.valid()) - { - branched_set.insert(def); - if (branched_set.size() > 1) - { - return false; - } - } - } - branched_set.clear(); - - // Check for branching down - for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED) - { - // TODO Fix this workaround for the case of model outputs that are used by another operation - // This is needed since the branching is decided by operation, but for model outputs, - // there is controlflow backen(use backend) but no actual use operation exists - if (_graph.getOutputs().contains(output)) - return false; - - const auto &output_obj = _graph.operands().at(output); - for (const auto &use : output_obj.getUses()) - { - branched_set.insert(use); - if (branched_set.size() > 1) - { - return false; - } - } - } - } - - // Connected? - // an input of one node is an output of the other node? or vice-versa? - { - const auto &node_inputs = node.getInputs(); - const auto &node_outputs = node.getOutputs(); - - // op_seq's operations are in order so that we just check the first and the last - std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]}; - if (op_seq.operations().size() > 1) - op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]); - - for (const auto &n_index : op_seq_ops) - { - const auto &n = _graph.operations().at(n_index); - - // node's output == op_seq's input? - for (const auto input : n.getInputs() | ir::Remove::UNDEFINED) - { - if (node_outputs.contains(input)) - { - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value() - << "(" << n.name() << ") is connected to NODE#" << node_index.value() - << "(" << node.name() << ")" << std::endl; - return true; - } - } - - // node's input == op_seq's output? - for (const auto output : n.getOutputs()) - { - if (node_inputs.contains(output)) - { - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value() - << " (" << n.name() << ") is connected to NODE#" << node_index.value() - << std::endl; - return true; - } - } - } - - VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is not connected to NODE#" - << node_index.value() << "(" << node.name() << ")" << std::endl; - } - - return false; -} - } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc index ed49ee56f..ccd08893f 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.cc +++ b/runtime/onert/core/src/compiler/ManualScheduler.cc @@ -29,9 +29,9 @@ namespace onert namespace compiler { -ManualScheduler::ManualScheduler(const backend::BackendContexts &backend_contexts, +ManualScheduler::ManualScheduler(const std::vector<const backend::Backend *> &backends, const compiler::CompilerOptions &options) - : _backend_contexts{backend_contexts}, _options{options} + : _backends{backends}, _options{options} { } @@ -42,7 +42,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap // This fallback will be used in case that `backend_for_all` is unavailable auto fallback = [&]() -> const backend::Backend * { - for (auto backend_id : _options.backend_list) + for (auto &&backend_id : _options.backend_list) { auto backend = resolveBackend(backend_id); if (backend) @@ -58,20 +58,20 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all->config()->id() << std::endl; - graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { backend_resolver->setBackend(index, backend_all); }); // 2. Backend per operation type std::unordered_map<ir::OpCode, backend::Backend *> op_type_map; - for (auto &pair : manual_options.opcode_to_backend) + for (const auto &pair : manual_options.opcode_to_backend) { op_type_map.emplace(pair.first, BackendManager::get().get(pair.second)); } // By default, Custom uses cpu backend op_type_map[ir::OpCode::Custom] = BackendManager::get().get("cpu"); - graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) { + graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &operation) { auto itr = op_type_map.find(operation.opcode()); if (itr != op_type_map.end()) { @@ -80,7 +80,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap }); // 3. Backend per operation - for (auto &pair : manual_options.index_to_backend) + for (const auto &pair : manual_options.index_to_backend) { const auto &key = pair.first; const auto &val = pair.second; @@ -88,22 +88,21 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap try { graph.operations().at(key); // Check if exist, or this will throw - backend_resolver->setBackend( - key, BackendManager::get().get( - val)); // TODO Ensure this backend is available in backend contexts + backend_resolver->setBackend(key, BackendManager::get().get(val)); } catch (...) { - VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" - << key.value() << " -> \"" << val << "\"" << std::endl; + VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" << key + << " -> \"" << val << "\"" << std::endl; } } // Dump final assignment - backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) { - VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " - << backend.config()->id() << std::endl; - }); + WHEN_LOG_ENABLED(backend_resolver->iterate( + [&](const ir::OperationIndex &index, const backend::Backend &backend) { + VERBOSE(ManualScheduler) << "backend for " << index << ": " << backend.config()->id() + << std::endl; + })); return backend_resolver; } @@ -113,7 +112,7 @@ const backend::Backend *ManualScheduler::resolveBackend(const std::string &id, { // Ensure if the backend is available in the current backend context const backend::Backend *backend = BackendManager::get().get(id); - if (!backend || _backend_contexts.find(backend) == _backend_contexts.end()) + if (!backend || std::find(_backends.begin(), _backends.end(), backend) == _backends.end()) { backend = fallback; } diff --git a/runtime/onert/core/src/compiler/ManualScheduler.h b/runtime/onert/core/src/compiler/ManualScheduler.h index 41503f7ff..18732d744 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.h +++ b/runtime/onert/core/src/compiler/ManualScheduler.h @@ -28,7 +28,7 @@ namespace compiler class ManualScheduler : public IScheduler { public: - ManualScheduler(const backend::BackendContexts &backend_contexts, + ManualScheduler(const std::vector<const backend::Backend *> &backends, const compiler::CompilerOptions &options); std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override; @@ -37,7 +37,7 @@ private: const backend::Backend *fallback = nullptr); private: - const backend::BackendContexts &_backend_contexts; + std::vector<const backend::Backend *> _backends; compiler::CompilerOptions _options; }; diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc new file mode 100644 index 000000000..141fdfe09 --- /dev/null +++ b/runtime/onert/core/src/compiler/MultiModelCompiler.cc @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MultiModelCompiler.h" + +#include "CompilerHelpers.h" +#include "ExecutorFactory.h" +#include "ShapeValidator.h" +#include "pass/ConstantOutputPass.h" +#include "pass/OddOutputPass.h" +#include "pass/PassRunner.h" +#include "pass/UnusedOperandEliminationPass.h" +#include "../dumper/dot/DotDumper.h" +#include "../exec/Executors.h" +#include "../ir/OperationDumper.h" +#include "../ir/verifier/Verifier.h" + +#include "compiler/StaticShapeInferer.h" + +#include <misc/string_helpers.h> +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace compiler +{ + +MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts) + : _nnpkg{nnpkg}, _voptions{} +{ + assert(nnpkg->model_count() != 1); + + for (uint32_t i = 0; i < copts.size(); i++) + { + _voptions.push_back(copts[i].get()); + } +} + +std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void) +{ + /*************************************************** + * Prepare compilation phase + ***************************************************/ + for (auto &&options : _voptions) + { + if (!options) + throw std::runtime_error{"Empty compile option"}; + + // Mode check + // TODO handle option for each model + if (options->he_profiling_mode) + throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet"); + + if (!options->minmax_filepath.empty()) + throw std::runtime_error("Recording minmax is not supported for multiple models"); + + options->forceInternalOptions(); + options->verboseOptions(); + } + + // NYI: allow one model compilation + auto const model_count = _nnpkg->model_count(); + if (model_count != _voptions.size()) + throw std::runtime_error{"Model count and option vector size mismatch"}; + + for (uint16_t i = 0; i < model_count; i++) + { + if (!_nnpkg->model(ir::ModelIndex{i})->hasOnly<ir::Graph>()) + throw std::runtime_error("MultiModelCompiler can only compile models for inference."); + } + + for (uint16_t i = 0; i < model_count; i++) + { + _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + + // Mandatory passes + pass::PassRunner{} + .append(std::make_unique<pass::ConstantOutputPass>(subg)) + .append(std::make_unique<pass::OddOutputPass>(subg)) + .run(); + + // Optimizations + pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run(); + }); + } + + /*************************************************** + * Backend independent analysis & optimization phase + ***************************************************/ + // TODO Handle dump level for each model + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + // TODO Support tracing_ctx for multiple model + std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr; + + // Model edge context: copy model edge context + auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges()); + + // Custom kernels + std::unordered_map<ir::ModelIndex, std::shared_ptr<backend::custom::IKernelBuilder>> + custom_kernel_builders; + for (uint16_t i = 0; i < model_count; i++) + { + auto const model_index = ir::ModelIndex{i}; + custom_kernel_builders[model_index] = _nnpkg->model(model_index)->getKernelBuilder(); + } + + // Lower: Assign backend + std::unordered_map<ir::ModelIndex, + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>> + lowered_subgs; + + for (uint16_t i = 0; i < model_count; i++) + { + auto const model_index = ir::ModelIndex{i}; + auto model = _nnpkg->model(model_index); + + model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + + dot_dumper.dump(subg, + nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value())); + // Lower: Assign backend + lowered_subgs[model_index][subg_index] = + std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]); + // Set tracing_ctx for copied graph + if (tracing_ctx != nullptr) + tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()), + subg_index.value()); + }); + } + + _nnpkg.reset(); + + for (const auto &pair : lowered_subgs) + { + const auto &model_index = pair.first; + const auto &model_lsubg = pair.second; + + for (const auto &pair_inner : model_lsubg) + { + const auto &subg_index = pair_inner.first; + const auto &lowered_subg = pair_inner.second; + dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(), + "-subg-", subg_index.value())); + } + } + + // Shape inference. + for (auto &&pair : lowered_subgs) + { + auto &model_lsubgs = pair.second; + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(model_lsubgs); + + const auto primary_subg_idx = ir::SubgraphIndex{0}; + inferers.at(primary_subg_idx)->infer(); + + for (const auto &pair_inferer : inferers) + { + const auto inferer = pair_inferer.second.get(); + inferer->dump(); + } + } + + // Shape validation + // TODO Move shape independent feature check from ShapeValidator to OperationValidator + // TODO Move ShapeValidator into shape inference + // - Check input tensor shape validation + // - Check parameter value validation which valid value is depend on input tensor shape + // - Output tensor shape validation check is needless because + // static/dynamic shape inferer will make valid output shape + for (const auto &pair : lowered_subgs) + { + const auto &model_lsubgs = pair.second; + + for (const auto &pair_inner : model_lsubgs) + { + const auto &lowered_subg = pair_inner.second; + compiler::ShapeValidator{lowered_subg->graph()}(); + } + } + + /************************************************************* + * Backend independent analysis & optimization phase finished + *************************************************************/ + auto executors = std::make_shared<exec::Executors>(std::move(model_edges)); + for (auto &&pair : lowered_subgs) + { + auto const &model_index = pair.first; + auto &model_lsubgs = pair.second; + + for (auto &&pair_inner : model_lsubgs) + { + auto const subg_index = pair_inner.first; + auto &lowered_subg = pair_inner.second; + auto const indexed_ranks = lowered_subg->indexed_ranks(); + + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); + lowered_subg->graph().operations().iterate( + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _voptions[model_index.value()]; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builders[model_index]; + auto executor = std::unique_ptr<exec::IExecutor>{ + ExecutorFactory::get().create(std::move(lowered_subg), executors, args)}; + executor->setIndexedRanks(indexed_ranks); + executors->emplace(model_index, subg_index, std::move(executor)); + } + } + + /******************************** + * Code generation phase finished + ********************************/ + return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx)); +} + +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h new file mode 100644 index 000000000..b282a5087 --- /dev/null +++ b/runtime/onert/core/src/compiler/MultiModelCompiler.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file MultiModelCompiler.h + * @brief This file contains MultiModelCompiler class to define and run compilation phase + */ + +#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__ +#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__ + +#include "compiler/CompilerOptions.h" +#include "compiler/ICompiler.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace compiler +{ + +/** + * @brief Class to compile NN package + */ +class MultiModelCompiler final : public ICompiler +{ +public: + /** + * @brief Construct a new Compiler object for NN package + * @param[in] nnpkg NN package to compile + * @param[in] coptions Compiler option vector for each model in package + */ + MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts); + + /** + * @brief Destroy the MultiModelCompiler object + */ + ~MultiModelCompiler() = default; + +public: + /** + * @brief Do compilation with the options + * + * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation + */ + std::shared_ptr<CompilerArtifact> compile(void); + +private: + std::shared_ptr<ir::NNPkg> _nnpkg; + std::vector<CompilerOptions *> _voptions; +}; + +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__ diff --git a/runtime/onert/core/src/ir/operation/LowerInfo.cc b/runtime/onert/core/src/compiler/OperationLowerInfo.cc index 249918bd6..e8a438130 100644 --- a/runtime/onert/core/src/ir/operation/LowerInfo.cc +++ b/runtime/onert/core/src/compiler/OperationLowerInfo.cc @@ -14,21 +14,18 @@ * limitations under the License. */ -#include "ir/operation/LowerInfo.h" +#include "compiler/OperationLowerInfo.h" namespace onert { -namespace ir -{ -namespace operation +namespace compiler { -LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout) - : _permute_factor{backend, layout} +OperationLowerInfo::OperationLowerInfo(const backend::Backend *backend, ir::Layout layout) + : _permute_factor{backend, layout} { // DO NOTHING } -} // namespace operation -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc deleted file mode 100644 index f7f659e3e..000000000 --- a/runtime/onert/core/src/compiler/OperationValidator.cc +++ /dev/null @@ -1,1053 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "OperationValidator.h" - -#include <typeinfo> - -#include "ir/Graph.h" -#include "ir/operation/LowerInfo.h" - -#include "util/logging.h" -#include "util/Utils.h" - -#define OP_REQUIRES(EXP) \ - do \ - { \ - if (!(EXP)) \ - throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \ - } while (0) - -namespace onert -{ -namespace compiler -{ - -OperationValidator::OperationValidator(const ir::Graph &graph) - : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN} -{ -} - -void OperationValidator::checkUnaryOp(const ir::Operation &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - // Check if I/O types match - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); - - if (_ctx.at(output_index).info().isDynamic()) - return; - - // Check if I/O shapes match - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); -} - -void OperationValidator::operator()() -{ - // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when - // creating Compiler - assert(_graph.subgraphs() == nullptr); - - _current_op_seq_layout = _graph.layout(); - - _graph.operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); -} - -void OperationValidator::visit(const ir::operation::BatchMatMul &node) -{ - const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS)); - const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS)); - const auto out_index{node.getOutputs().at(0)}; - - // Constant lhs and rhs is not implemented yet - OP_REQUIRES(!_ctx.at(lhs_index).isConstant() && !_ctx.at(rhs_index).isConstant()); - - if (_ctx.at(out_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2); -} - -void OperationValidator::visit(const ir::operation::BatchToSpaceND &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; - const auto block_size_index{ - node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - - const auto frontend_layout = _current_op_seq_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - - // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); - - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); - - OP_REQUIRES(_ctx.at(block_size_index).isConstant()); - - OP_REQUIRES(input_shape.C == output_shape.C); -} - -void OperationValidator::visit(const ir::operation::Comparison &node) -{ - const auto output_index{node.getOutputs().at(0)}; - // This validator does not check shape. So checking isDynamic() is skipped. - - const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; - const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; - - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8); -} - -void OperationValidator::visit(const ir::operation::Softmax &node) -{ - VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl; - - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); -} - -void OperationValidator::visit(const ir::operation::InstanceNorm &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; - const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; - const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape()); - OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1); -} - -void OperationValidator::visit(const ir::operation::Pool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; - - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); -} - -void OperationValidator::visit(const ir::operation::Permute &node) -{ - VERBOSE(Permute) << "Configure Permute operation" << std::endl; - - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); -} - -void OperationValidator::visit(const ir::operation::Reduce &node) -{ - VERBOSE(Permute) << "Configure " + node.name() + " operation" << std::endl; - - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto input_shape = _ctx.at(input_index).shape(); - const auto output_shape = _ctx.at(output_index).shape(); - - OP_REQUIRES(input_shape.rank() <= 4); - OP_REQUIRES(output_shape.rank() <= input_shape.rank()); - - // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only - // supports cases reducing height and width or reducing depth. - // TODO We have to support all cases of dimensions up to 4. - // For correct permuting, we have to set output's shape to be equal in dimension position of the - // input. But the positions of the same dimensions in the input and output may be set differently. - // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original - // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to - // extend it in 4 dimensions, it should be {1,1,3,5}. - // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of - // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the - // next operation is not desired. - if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank()) - { - if (output_shape.rank() == 2) - { - // Reducing HW - OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) && - input_shape.dim(3) == output_shape.dim(1)); - } - else if (output_shape.rank() == 3) - { - // Reducing C or - // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1) - OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) && - input_shape.dim(1) == output_shape.dim(1) && - input_shape.dim(2) == output_shape.dim(2)) || - (input_shape.dim(0) == output_shape.dim(0) && - (input_shape.dim(1) == output_shape.dim(1) || - input_shape.dim(2) == output_shape.dim(1)) && - input_shape.dim(3) == 1 && output_shape.dim(2) == 1)); - } - } -} - -void OperationValidator::visit(const ir::operation::Transpose &node) -{ - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - const auto &perm{node.param().perm}; - - const auto &output_shape = _ctx.at(output_index).shape(); - const auto &input_shape = _ctx.at(input_index).shape(); - - OP_REQUIRES(input_shape.rank() == static_cast<int>(perm.size())); - OP_REQUIRES(input_shape.rank() == output_shape.rank()); -} - -void OperationValidator::visit(const ir::operation::RNN &node) -{ - // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn - // TODO Support dynamic rnn - const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto hidden_state_out_index{ - node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)}; - - const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)}; - const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)}; - const auto recurrent_weights_index{ - node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)}; - const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; - const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; - - const auto batch_size = _ctx.at(output_index).shape().dim(0); - const auto num_units = _ctx.at(output_index).shape().dim(1); - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 && - _ctx.at(hidden_state_out_index).shape().rank() == 2 && - _ctx.at(input_index).shape().rank() == 2 && - _ctx.at(weights_index).shape().rank() == 2 && - _ctx.at(recurrent_weights_index).shape().rank() == 2 && - _ctx.at(hidden_state_in_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1); - - OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_out_index).shape().dim(0)); - OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1)); - - OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(0) && - num_units == _ctx.at(bias_index).shape().dim(0)); - OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_in_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); -} - -void OperationValidator::visit(const ir::operation::SpaceToBatchND &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; - const auto block_size_index{ - node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; - const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - - const auto frontend_layout = _current_op_seq_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - - // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2); - - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2); - - OP_REQUIRES(_ctx.at(block_size_index).isConstant()); - OP_REQUIRES(_ctx.at(paddings_index).isConstant()); - - OP_REQUIRES(input_shape.C == output_shape.C); -} - -void OperationValidator::visit(const ir::operation::SpaceToDepth &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; - - const auto frontend_layout = _current_op_seq_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - const auto block_size = node.param().block_size; - - // All assertions as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES((block_size >= 1) && (input_shape.H % block_size == 0) && - (input_shape.W % block_size == 0)); - OP_REQUIRES(input_shape.N == output_shape.N); - OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C); -} - -void OperationValidator::visit(const ir::operation::ElementwiseActivation &node) -{ - checkUnaryOp(node); -} - -void OperationValidator::visit(const ir::operation::ElementwiseBinary &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); -} - -void OperationValidator::visit(const ir::operation::ElementwiseUnary &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - - OP_REQUIRES(node.getInputs().size() == 1); - OP_REQUIRES(node.getOutputs().size() == 1); - - // Check if I/O types match - if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE) - { - OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32); - } - else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE) - { - OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32); - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); - } - else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST) - { - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); - } - - if (_ctx.at(output_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); -} - -void OperationValidator::visit(const ir::operation::EmbeddingLookup &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; - const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - - const auto &output_obj = _ctx.at(output_index); - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &values_obj = _ctx.at(values_index); - - // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying - // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729) - { - OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32); - - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto &output_shape = output_obj.shape(); - const auto &lookups_shape = lookups_obj.shape(); - const auto &values_shape = values_obj.shape(); - - OP_REQUIRES(lookups_shape.rank() == 1); - OP_REQUIRES(values_shape.rank() >= 2); - - // output should be a n-D tensor with the same rank and shape as the values tensor, except for - // the first dimension which has the same size as lookups' only dimension. - OP_REQUIRES(output_shape.rank() == values_shape.rank()); - OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0)); - for (int n = 1; n < output_shape.rank(); ++n) - { - OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n)); - } - } -} - -void OperationValidator::visit(const ir::operation::ExpandDims &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32); - - if (_ctx.at(axis_index).info().isDynamic()) - return; - OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1); -} - -void OperationValidator::visit(const ir::operation::HashtableLookup &node) -{ - const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; - const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)}; - - const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; - const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; - const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - - const auto &output_obj = _ctx.at(output_index); - const auto &hits_obj = _ctx.at(hits_index); - - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &keys_obj = _ctx.at(keys_index); - const auto &values_obj = _ctx.at(values_index); - - OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32); - OP_REQUIRES(keys_obj.typeInfo().type() == ir::DataType::INT32); - OP_REQUIRES(hits_obj.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); - - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto &output_shape = output_obj.shape(); - const auto &lookups_shape = lookups_obj.shape(); - const auto &keys_shape = keys_obj.shape(); - const auto &values_shape = values_obj.shape(); - - OP_REQUIRES(values_shape.rank() == output_shape.rank()); - OP_REQUIRES(lookups_shape.rank() == 1); - OP_REQUIRES(keys_shape.rank() == 1); - OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0)); - OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0)); -} - -void OperationValidator::visit(const ir::operation::TransposeConv &node) -{ - // param check - OP_REQUIRES((node.param().padding.type == ir::PaddingType::SAME) || - (node.param().padding.type == ir::PaddingType::VALID)); - - // shape check - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; - - // Only 4D tensors are supported - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); - - const auto frontend_layout = _current_op_seq_layout; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - // The kernel has only IHWO layout on frontend - // So ker_shape is treated here below - // I -> N - // H -> H - // W -> W - // O -> C - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC); - - OP_REQUIRES(ifm_shape.N == ofm_shape.N); - OP_REQUIRES(ifm_shape.C == ker_shape.C); - OP_REQUIRES(ker_shape.N == ofm_shape.C); -} - -void OperationValidator::visit(const ir::operation::Gather &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; - const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; - - const auto ifm_shape = _ctx.at(ifm_index).shape(); - const auto indices_shape = _ctx.at(indices_index).shape(); - const auto ofm_shape = _ctx.at(ofm_index).shape(); - - OP_REQUIRES(ifm_shape.rank() <= 4); - OP_REQUIRES(indices_shape.rank() <= 3); - OP_REQUIRES(ofm_shape.rank() <= 4); -} - -void OperationValidator::visit(const ir::operation::DepthToSpace &node) -{ - // param check - int32_t block_size = node.param().block_size; - - OP_REQUIRES(block_size > 0); - - // shape check - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; - - const auto frontend_layout = _current_op_seq_layout; - const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); - const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); - - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); - - { - OP_REQUIRES(output_shape.N == input_shape.N); - OP_REQUIRES(output_shape.H == input_shape.H * block_size); - OP_REQUIRES(output_shape.W == input_shape.W * block_size); - OP_REQUIRES(input_shape.C % (block_size * block_size) == 0); - OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size)); - } -} - -void OperationValidator::visit(const ir::operation::Pack &node) -{ - // param check - const auto num{node.param().num}; - const auto axis{node.param().axis}; - OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size())); - - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - // shape check - const auto &output_shape = _ctx.at(output_index).shape(); - const auto output_rank = static_cast<int32_t>(output_shape.rank()); - - const auto input1_index{node.getInputs().at(0)}; - const auto input_shape = _ctx.at(input1_index).shape(); - - OP_REQUIRES(axis >= -output_rank && axis < output_rank); - for (const auto &index : node.getInputs()) - { - OP_REQUIRES(input_shape == _ctx.at(index).shape()); - } -} - -void OperationValidator::visit(const ir::operation::LSTM &node) -{ - // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn - // TODO Support dynamic rnn - const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto scratch_buffer_index{ - node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; - const auto output_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; - const auto cell_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; - - const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; - const auto input_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; - const auto input_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; - const auto input_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; - const auto input_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; - const auto recurrent_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; - const auto recurrent_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; - const auto recurrent_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; - const auto recurrent_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; - const auto cell_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; - const auto cell_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; - const auto cell_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; - const auto input_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; - const auto forget_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; - const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; - const auto output_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; - const auto projection_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; - const auto projection_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; - const auto output_state_in_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; - const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; - - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2 && - _ctx.at(output_state_out_index).shape().rank() == 2 && - _ctx.at(cell_state_out_index).shape().rank() == 2 && - _ctx.at(output_index).shape().rank() == 2 && - _ctx.at(input_index).shape().rank() == 2 && - _ctx.at(input_to_input_weights_index).shape().rank() == 2 && - _ctx.at(input_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(input_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(input_to_output_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 && - _ctx.at(projection_weights_index).shape().rank() == 2 && - _ctx.at(output_state_in_index).shape().rank() == 2 && - _ctx.at(cell_state_in_index).shape().rank() == 2); - - OP_REQUIRES(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 && - _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 && - _ctx.at(cell_to_output_weights_index).shape().rank() == 1 && - _ctx.at(input_gate_bias_index).shape().rank() == 1 && - _ctx.at(forget_gate_bias_index).shape().rank() == 1 && - _ctx.at(cell_bias_index).shape().rank() == 1 && - _ctx.at(output_gate_bias_index).shape().rank() == 1 && - _ctx.at(projection_bias_index).shape().rank() == 1); - - // CIFG assertion - OP_REQUIRES((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 && - _ctx.at(input_gate_bias_index).shape().dim(0) == 0 && - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) || - (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 && - _ctx.at(input_gate_bias_index).shape().dim(0) != 0)); - - // Peephole assertion - OP_REQUIRES((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 && - _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) || - (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 && - _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0)); - - bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; - bool has_recurrent_to_input_weights = - _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; - bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0; - bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0; - bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; - bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; - bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && - _ctx.at(projection_weights_index).shape().dim(1) != 0; - bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); - - // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). - // true: no CIFG - // false: CIFG - bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; - - // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole. - // true: peephole - // false: no peephole - bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; - - // NOTE The projection weights may have data but the projection bias may not. - bool has_projection_param = has_projection_weights; - - const auto batch_size = _ctx.at(input_index).shape().dim(0); - OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) && - batch_size == _ctx.at(cell_state_in_index).shape().dim(0) && - batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) && - batch_size == _ctx.at(output_state_out_index).shape().dim(0) && - batch_size == _ctx.at(cell_state_out_index).shape().dim(0) && - batch_size == _ctx.at(output_index).shape().dim(0)); - - const auto input_size = _ctx.at(input_index).shape().dim(1); - OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_output_weights_index).shape().dim(1)); - - const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1); - OP_REQUIRES(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_bias_index).shape().dim(0) && - num_units == _ctx.at(output_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_state_in_index).shape().dim(1) && - (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) || - ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1)))); - - const auto output_size = _ctx.at(output_index).shape().dim(1); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) && - output_size == _ctx.at(output_state_in_index).shape().dim(1) && - output_size == _ctx.at(output_state_out_index).shape().dim(1)); - - if (has_cifg_param) - { - OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1)); - OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) && - (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) && - num_units == _ctx.at(input_gate_bias_index).shape().dim(0)); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1)); - OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights && - has_input_gate_bias); - if (has_cell_to_input_weights) - { - // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole. - OP_REQUIRES(has_peephole_param); - } - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4); - } - else - { - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3); - } - - if (has_peephole_param) - { - OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) && - (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); - } - - if (has_projection_param) - { - OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1)); - OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0)); - if (has_projection_bias) - { - OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0)); - } - } -} - -void OperationValidator::visit(const ir::operation::L2Normalization &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; - - auto ifm_shape = _ctx.at(ifm_index).shape(); - auto ofm_shape = _ctx.at(ofm_index).shape(); - - OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank()); - - for (auto i = 0; i < ifm_shape.rank(); i++) - { - OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i)); - } -} - -void OperationValidator::visit(const ir::operation::Unpack &node) -{ - const auto num{node.param().num}; - OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size())); - const auto axis{node.param().axis}; - - const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; - - const auto &input_shape = _ctx.at(input_index).shape(); - const auto input_rank = static_cast<int32_t>(input_shape.rank()); - - OP_REQUIRES(axis >= -input_rank && axis < input_rank); -} - -void OperationValidator::visit(const ir::operation::Pad &node) -{ - const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; - OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32); - - const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; - - const auto &pad_shape = _ctx.at(pad_index).shape(); - const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank()); - - OP_REQUIRES(pad_shape.rank() == 2); - OP_REQUIRES(pad_shape.dim(0) == input_rank); - OP_REQUIRES(pad_shape.dim(1) == 2); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); -} - -void OperationValidator::visit(const ir::operation::Select &node) -{ - const auto output_index{node.getOutputs().at(0)}; - // This validator does not check shape. So checking isDynamic() is skipped. - - const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)}; - const auto input_true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; - const auto input_false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - UNUSED_RELEASE(output_index); - UNUSED_RELEASE(input_true_index); - UNUSED_RELEASE(input_false_index); - - OP_REQUIRES(_ctx.at(condition_index).typeInfo().type() == ir::DataType::BOOL8); -} - -void OperationValidator::visit(const ir::operation::StridedSlice &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; - const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; - const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - - UNUSED_RELEASE(starts_index); - UNUSED_RELEASE(ends_index); - UNUSED_RELEASE(strides_index); - - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); - - if (_ctx.at(output_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4); -} - -void OperationValidator::visit(const ir::operation::Split &node) -{ - const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; - - if (_ctx.at(input_index).info().isDynamic()) - return; - - const auto num_splits = node.param().num_splits; - const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto axis = node.param().axis < 0 ? node.param().axis + input_rank : node.param().axis; - - OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF); - OP_REQUIRES(axis >= 0 && axis < input_rank); - OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits)); - - OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0); -} - -void OperationValidator::visit(const ir::operation::Shape &node) -{ - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - UNUSED_RELEASE(input_index); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1); -} - -void OperationValidator::visit(const ir::operation::ResizeBilinear &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - - if (_ctx.at(output_index).info().isDynamic()) - { - return; - } - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); - - auto align_corners = node.param().align_corners; - auto half_pixel_centers = node.param().half_pixel_centers; - - OP_REQUIRES(!align_corners || !half_pixel_centers); -} - -void OperationValidator::visit(const ir::operation::Reverse &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)}; - - OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32); - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); - - if (_ctx.at(output_index).info().isDynamic()) - return; - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); -} - -void OperationValidator::visit(const ir::operation::If &) -{ - // TODO Add to validate with subgraphs -} - -void OperationValidator::visit(const ir::operation::While &node) -{ - // This validator does not check shape. So checking isDynamic() is skipped. - - OP_REQUIRES(node.getInputs().size() == node.getOutputs().size()); - // TODO Add to validate with subgraphs -} - -void OperationValidator::visit(const ir::operation::SquaredDifference &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - - // Check for Type equivalence - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(lhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - - // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) - return; - - auto output_shape = _ctx.at(output_index).shape(); - auto lhs_shape = _ctx.at(lhs_index).shape(); - auto rhs_shape = _ctx.at(rhs_index).shape(); - // Check for output rank - OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank())); - auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank()); - - for (int idx = 1; idx <= min_rank; idx++) - { - int l_idx = lhs_shape.rank() - idx; - int r_idx = rhs_shape.rank() - idx; - int out_idx = output_shape.rank() - idx; - - OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0)); - - auto l_dims = lhs_shape.dim(l_idx); - auto r_dims = rhs_shape.dim(r_idx); - auto out_dims = output_shape.dim(out_idx); - - OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) || - ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims))); - } - auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape; - for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++) - { - int out_idx = output_shape.rank() - idx; - int tmp_idx = tmp_shape.rank() - idx; - - OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) && - (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx))); - } -} -void OperationValidator::visit(const ir::operation::Tile &node) -{ - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - const auto multiple_index{node.getInputs().at(1)}; - - OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank()); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); -} - -void OperationValidator::visit(const ir::operation::Range &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)}; - const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)}; - const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)}; - - // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0); -} - -void OperationValidator::visit(const ir::operation::MatrixBandPart &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)}; - const auto num_lower_index{ - node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)}; - const auto num_upper_index{ - node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)}; - - // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix - OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar - OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar -} - -void OperationValidator::visit(const ir::operation::LogSoftmax &node) -{ - VERBOSE(LogSoftmax) << "Configure LOGSOFTMAX operation" << std::endl; - - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); -} - -} // namespace compiler -} // namespace onert diff --git a/runtime/onert/core/src/compiler/ParamChecker.h b/runtime/onert/core/src/compiler/ParamChecker.h deleted file mode 100644 index 61429d521..000000000 --- a/runtime/onert/core/src/compiler/ParamChecker.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file ParamChecker.h - * @brief This file contains ParamChecker to check\n - * operations' parameters are compilable at machine independent phase\n - * ex) Check param is constant - */ -#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__ -#define __ONERT_COMPILER_PARAM_CHECKER_H__ - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -class Graph; -} // namespace ir -} // namespace onert - -namespace onert -{ -namespace compiler -{ - -class ParamChecker : public ir::OperationVisitor -{ -public: - /** - * @brief Construct a new Param Checker object (deleted) - */ - ParamChecker(void) = delete; - /** - * @brief Construct a new Param Checker object - * @param[in] model Graph model to check - */ - ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {} - -public: - /** - * @brief Run parameter analysis - */ - void operator()(); - /** - * @brief Return analysis result if model have non-const parameter - * @return @c true if there is non-const parameter, otherwise @c false - */ - bool haveNoneConstParam(void) { return _nonConstParam; } - -private: - const std::shared_ptr<ir::Graph> _model; - bool _nonConstParam{false}; -}; - -} // namespace compiler -} // namespace onert - -#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__ diff --git a/runtime/onert/core/src/compiler/PermuteFactor.cc b/runtime/onert/core/src/compiler/PermuteFactor.cc new file mode 100644 index 000000000..f0081a2a4 --- /dev/null +++ b/runtime/onert/core/src/compiler/PermuteFactor.cc @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/PermuteFactor.h" + +#include <assert.h> +#include <ostream> + +#include "backend/Backend.h" + +std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj) +{ + assert(obj.backend() && obj.backend()->config()); + return os << "(" << obj.backend()->config()->id() << "/" << to_string(obj.layout()) << ")"; +} diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc new file mode 100644 index 000000000..5c25ea1d1 --- /dev/null +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -0,0 +1,1082 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ShapeValidator.h" + +#include <typeinfo> + +#include "ir/Graph.h" +#include "util/logging.h" +#include "util/Utils.h" + +#define OP_REQUIRES(EXP) \ + do \ + { \ + if (!(EXP)) \ + throw std::runtime_error("ShapeValidator failed at line " + std::to_string(__LINE__)); \ + } while (0) + +namespace onert +{ +namespace compiler +{ + +ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {} + +void ShapeValidator::checkUnaryOp(const ir::Operation &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + if (operands.at(output_index).info().isDynamic()) + return; + + // Check if I/O shapes match + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); +} + +void ShapeValidator::operator()() +{ + _graph.operations().iterate( + [&](const ir::OperationIndex &, const ir::IOperation &node) { node.accept(*this); }); +} + +void ShapeValidator::visit(const ir::operation::BatchMatMul &node) +{ + const auto &operands = _graph.operands(); + const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS)); + const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS)); + const auto out_index{node.getOutputs().at(0)}; + + if (operands.at(out_index).info().isDynamic()) + return; + + OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2); + OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2); +} + +void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + + // All requirement as per NNAPI specification. + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); + + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); + + if (node.getInputs().size() != 2) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + OP_REQUIRES(operands.at(crops_index).shape().rank() == 2); + OP_REQUIRES(operands.at(crops_index).shape().dim(0) == + (operands.at(ifm_index).shape().rank() - 2)); + OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2); + } + + OP_REQUIRES(input_shape.C == output_shape.C); +} + +void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; + const auto weight_scales_index{ + node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)}; + const auto weight_binary_index{ + node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)}; + const auto weight_cluster_index{ + node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; + // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)}; + + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2); + + OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1)); + + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2); + + // more shape validation will be done inside kernel. + + // TODO Check bias dimension (can be null tensor) +} + +void ShapeValidator::visit(const ir::operation::BCQGather &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; + const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)}; + const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)}; + const auto input_clusters_index{ + node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; + + OP_REQUIRES(operands.at(indices_index).shape().rank() <= + 2); // TODO : support rank up to 4 or more + OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2); + + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2); + + // more shape validation will be done inside kernel. +} + +void ShapeValidator::visit(const ir::operation::Comparison &) +{ + // TODO Shape validation of comparison +} + +void ShapeValidator::visit(const ir::operation::Softmax &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(0)}; + + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); +} + +void ShapeValidator::visit(const ir::operation::InstanceNorm &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; + const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; + const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; + + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape()); + OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1); + OP_REQUIRES(operands.at(beta_index).shape().rank() == 1); +} + +void ShapeValidator::visit(const ir::operation::Pool2D &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; + + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); +} + +void ShapeValidator::visit(const ir::operation::Permute &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(0)}; + + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); +} + +void ShapeValidator::visit(const ir::operation::Reduce &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto &input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; + const auto &input_shape = operands.at(input_index).shape(); + const auto &output_shape = operands.at(output_index).shape(); + + OP_REQUIRES(input_shape.rank() <= 4); + OP_REQUIRES(output_shape.rank() <= input_shape.rank()); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank()) + { + if (output_shape.rank() == 2) + { + // Reducing HW + OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) && + input_shape.dim(3) == output_shape.dim(1)); + } + else if (output_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1) + OP_REQUIRES( + (input_shape.dim(0) == output_shape.dim(0) && input_shape.dim(1) == output_shape.dim(1) && + input_shape.dim(2) == output_shape.dim(2)) || + (input_shape.dim(0) == output_shape.dim(0) && + (input_shape.dim(1) == output_shape.dim(1) || input_shape.dim(2) == output_shape.dim(1)) && + input_shape.dim(3) == 1 && output_shape.dim(2) == 1)); + } + } +} + +void ShapeValidator::visit(const ir::operation::Transpose &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; + const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; + + const auto &output_shape = operands.at(output_index).shape(); + const auto &input_shape = operands.at(input_index).shape(); + + OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 || + input_shape.rank() == + static_cast<int>(operands.at(perm_index).shape().num_elements())); + OP_REQUIRES(input_shape.rank() == output_shape.rank()); +} + +void ShapeValidator::visit(const ir::operation::RNN &node) +{ + // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn + // TODO Support dynamic rnn + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto hidden_state_out_index{ + node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)}; + + const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)}; + const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)}; + const auto recurrent_weights_index{ + node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)}; + const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; + const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; + + const auto batch_size = operands.at(output_index).shape().dim(0); + const auto num_units = operands.at(output_index).shape().dim(1); + + OP_REQUIRES(operands.at(output_index).shape().rank() == 2 && + operands.at(hidden_state_out_index).shape().rank() == 2 && + operands.at(input_index).shape().rank() == 2 && + operands.at(weights_index).shape().rank() == 2 && + operands.at(recurrent_weights_index).shape().rank() == 2 && + operands.at(hidden_state_in_index).shape().rank() == 2); + OP_REQUIRES(operands.at(bias_index).shape().rank() == 1); + + OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) && + batch_size == operands.at(hidden_state_in_index).shape().dim(0) && + batch_size == operands.at(hidden_state_out_index).shape().dim(0)); + OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1)); + + OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) && + num_units == operands.at(recurrent_weights_index).shape().dim(0) && + num_units == operands.at(bias_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) && + num_units == operands.at(recurrent_weights_index).shape().dim(1) && + num_units == operands.at(hidden_state_in_index).shape().dim(1) && + num_units == operands.at(hidden_state_out_index).shape().dim(1)); +} + +void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + const auto block_size_index{ + node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; + + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + + // All requirement as per NNAPI specification. + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); + OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2); + + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2); + + OP_REQUIRES(input_shape.C == output_shape.C); +} + +void ShapeValidator::visit(const ir::operation::SpaceToDepth &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; + + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + const auto block_size = node.param().block_size; + + // All assertions as per NNAPI specification. + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0)); + OP_REQUIRES(input_shape.N == output_shape.N); + OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C); +} + +void ShapeValidator::visit(const ir::operation::ElementwiseActivation &node) { checkUnaryOp(node); } + +void ShapeValidator::visit(const ir::operation::ElementwiseBinary &) +{ + // TODO Shape validation of ElementwiseBinary +} + +void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; + + if (operands.at(output_index).info().isDynamic()) + return; + + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); +} + +void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &values_obj = operands.at(values_index); + + // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying + // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729) + { + if (operands.at(output_index).info().isDynamic()) + return; + + const auto &output_shape = output_obj.shape(); + const auto &lookups_shape = lookups_obj.shape(); + const auto &values_shape = values_obj.shape(); + + OP_REQUIRES(lookups_shape.rank() == 1); + OP_REQUIRES(values_shape.rank() >= 2); + + // output should be a n-D tensor with the same rank and shape as the values tensor, except for + // the first dimension which has the same size as lookups' only dimension. + OP_REQUIRES(output_shape.rank() == values_shape.rank()); + OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0)); + for (int n = 1; n < output_shape.rank(); ++n) + { + OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n)); + } + } +} + +void ShapeValidator::visit(const ir::operation::ExpandDims &node) +{ + const auto &operands = _graph.operands(); + const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + + if (operands.at(axis_index).info().isDynamic()) + return; + OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1); +} + +void ShapeValidator::visit(const ir::operation::HashtableLookup &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; + const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; + const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; + + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &keys_obj = operands.at(keys_index); + const auto &values_obj = operands.at(values_index); + + if (operands.at(output_index).info().isDynamic()) + return; + + const auto &output_shape = output_obj.shape(); + const auto &lookups_shape = lookups_obj.shape(); + const auto &keys_shape = keys_obj.shape(); + const auto &values_shape = values_obj.shape(); + + OP_REQUIRES(values_shape.rank() == output_shape.rank()); + OP_REQUIRES(lookups_shape.rank() == 1); + OP_REQUIRES(keys_shape.rank() == 1); + OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0)); + OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0)); +} + +void ShapeValidator::visit(const ir::operation::TransposeConv &node) +{ + // shape check + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; + const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; + + // Only 4D tensors are supported + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank()); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank()); + + const auto frontend_layout = _graph.layout(); + const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + // The kernel has only IHWO layout on frontend + // So ker_shape is treated here below + // I -> N + // H -> H + // W -> W + // O -> C + const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC); + + OP_REQUIRES(ifm_shape.N == ofm_shape.N); + OP_REQUIRES(ifm_shape.C == ker_shape.C); + OP_REQUIRES(ker_shape.N == ofm_shape.C); +} + +void ShapeValidator::visit(const ir::operation::Gather &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + const auto &ifm_shape = operands.at(ifm_index).shape(); + const auto &indices_shape = operands.at(indices_index).shape(); + const auto &ofm_shape = operands.at(ofm_index).shape(); + + OP_REQUIRES(ifm_shape.rank() <= 4); + OP_REQUIRES(indices_shape.rank() <= 3); + OP_REQUIRES(ofm_shape.rank() <= 4); +} + +void ShapeValidator::visit(const ir::operation::DepthToSpace &node) +{ + const auto &operands = _graph.operands(); + int32_t block_size = node.param().block_size; + + // shape check + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + + const auto frontend_layout = _graph.layout(); + const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout); + const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout); + + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); + + { + OP_REQUIRES(output_shape.N == input_shape.N); + OP_REQUIRES(output_shape.H == input_shape.H * block_size); + OP_REQUIRES(output_shape.W == input_shape.W * block_size); + OP_REQUIRES(input_shape.C % (block_size * block_size) == 0); + OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size)); + } +} + +void ShapeValidator::visit(const ir::operation::Pack &node) +{ + const auto &operands = _graph.operands(); + const auto axis{node.param().axis}; + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + // shape check + const auto &output_shape = operands.at(output_index).shape(); + const auto output_rank = static_cast<int32_t>(output_shape.rank()); + + const auto input1_index{node.getInputs().at(0)}; + const auto &input_shape = operands.at(input1_index).shape(); + + OP_REQUIRES(axis >= -output_rank && axis < output_rank); + for (const auto &index : node.getInputs()) + { + OP_REQUIRES(input_shape == operands.at(index).shape()); + } +} + +void ShapeValidator::visit(const ir::operation::LSTM &node) +{ + // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn + // TODO Support dynamic rnn + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto scratch_buffer_index{ + node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional + const auto output_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional + const auto cell_state_out_index{ + node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional + + const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto input_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional + const auto input_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; + const auto input_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; + const auto input_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional + const auto recurrent_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + const auto recurrent_to_cell_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; + const auto recurrent_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto cell_to_input_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional + const auto cell_to_forget_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional + const auto cell_to_output_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional + const auto input_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional + const auto forget_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; + const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; + const auto output_gate_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; + const auto projection_weights_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional + const auto projection_bias_index{ + node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional + const auto output_state_in_index{ + node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; + const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; + + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); + for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i) + { + OP_REQUIRES(operands.at(input_index).shape().dim(i) == + operands.at(output_index).shape().dim(i)); + } + OP_REQUIRES((operands.at(output_index).shape().rank() == 2 || + operands.at(output_index).shape().rank() == 3) && + (operands.at(input_index).shape().rank() == 2 || + operands.at(input_index).shape().rank() == 3) && + (!operands.exist(input_to_input_weights_index) || + operands.at(input_to_input_weights_index).shape().rank() == 2) && + operands.at(input_to_forget_weights_index).shape().rank() == 2 && + operands.at(input_to_cell_weights_index).shape().rank() == 2 && + operands.at(input_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(recurrent_to_input_weights_index) || + operands.at(recurrent_to_input_weights_index).shape().rank() == 2) && + operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(projection_weights_index) || + operands.at(projection_weights_index).shape().rank() == 2) && + operands.at(output_state_in_index).shape().rank() == 2 && + operands.at(cell_state_in_index).shape().rank() == 2); + + OP_REQUIRES((!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().rank() == 1) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().rank() == 1) && + operands.at(forget_gate_bias_index).shape().rank() == 1 && + operands.at(cell_bias_index).shape().rank() == 1 && + operands.at(output_gate_bias_index).shape().rank() == 1 && + (!operands.exist(projection_bias_index) || + operands.at(projection_bias_index).shape().rank() == 1)); + + // CIFG assertion + OP_REQUIRES(((!operands.exist(input_to_input_weights_index) || + (operands.at(input_to_input_weights_index).shape().dim(0) == 0 && + operands.at(input_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(recurrent_to_input_weights_index) || + (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) || + ((operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(input_gate_bias_index) && + operands.at(input_gate_bias_index).shape().dim(0) != 0))); + + // Peephole assertion + OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) || + ((operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) && + (operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0))); + + bool has_input_to_input_weights = + operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0); + bool has_recurrent_to_input_weights = + operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0); + bool has_input_gate_bias = + operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0; + bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) && + operands.at(cell_to_input_weights_index).shape().dim(0) != 0; + bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = operands.exist(projection_weights_index) && + (operands.at(projection_weights_index).shape().dim(0) != 0 && + operands.at(projection_weights_index).shape().dim(1) != 0); + bool has_projection_bias = + operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0; + + // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). + // true: no CIFG + // false: CIFG + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + + // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole. + // true: peephole + // false: no peephole + bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; + + // NOTE The projection weights may have data but the projection bias may not. + bool has_projection_param = has_projection_weights; + + const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major) + ? operands.at(input_index).shape().dim(1) + : operands.at(input_index).shape().dim(0); + OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) && + batch_size == operands.at(cell_state_in_index).shape().dim(0)); + + const auto input_size = + operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1); + OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) && + input_size == operands.at(input_to_cell_weights_index).shape().dim(1) && + input_size == operands.at(input_to_output_weights_index).shape().dim(1)); + + const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0); + OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(input_to_output_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) && + num_units == operands.at(forget_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_bias_index).shape().dim(0) && + num_units == operands.at(output_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_state_in_index).shape().dim(1)); + + const auto output_size = + operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1); + OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) && + output_size == operands.at(output_state_in_index).shape().dim(1)); + + if (has_cifg_param) + { + OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1)); + OP_REQUIRES( + num_units == operands.at(input_to_input_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) && + ((operands.exist(cell_to_input_weights_index) && + num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) || + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) && + num_units == operands.at(input_gate_bias_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1)); + OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights && + has_input_gate_bias); + if (has_cell_to_input_weights) + { + // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole. + OP_REQUIRES(has_peephole_param); + } + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4); + } + else + { + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3); + } + + if (has_peephole_param) + { + OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(cell_to_output_weights_index).shape().dim(0) && + (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); + } + + if (has_projection_param) + { + OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1)); + OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0)); + if (has_projection_bias) + { + OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0)); + } + } + + if (operands.exist(scratch_buffer_index)) + { + OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0)); + } + + if (operands.exist(output_state_out_index)) + { + OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1)); + } + + if (operands.exist(cell_state_out_index)) + { + OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1)); + } +} + +void ShapeValidator::visit(const ir::operation::L2Normalization &node) +{ + const auto &operands = _graph.operands(); + const auto ofm_index{node.getOutputs().at(0)}; + if (operands.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; + + auto ifm_shape = operands.at(ifm_index).shape(); + auto ofm_shape = operands.at(ofm_index).shape(); + + OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank()); + + for (auto i = 0; i < ifm_shape.rank(); i++) + { + OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i)); + } +} + +void ShapeValidator::visit(const ir::operation::Unpack &node) +{ + const auto &operands = _graph.operands(); + const auto axis{node.param().axis}; + const auto output_index{node.getInputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; + + const auto &input_shape = operands.at(input_index).shape(); + const auto input_rank = static_cast<int32_t>(input_shape.rank()); + + OP_REQUIRES(axis >= -input_rank && axis < input_rank); +} + +void ShapeValidator::visit(const ir::operation::Pad &node) +{ + const auto &operands = _graph.operands(); + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32); + + const auto output_index{node.getInputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + + const auto &pad_shape = operands.at(pad_index).shape(); + const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank()); + + OP_REQUIRES(pad_shape.rank() == 2); + OP_REQUIRES(pad_shape.dim(0) == input_rank); + OP_REQUIRES(pad_shape.dim(1) == 2); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); +} + +void ShapeValidator::visit(const ir::operation::Select &) +{ + // TODO Shape validation of select +} + +void ShapeValidator::visit(const ir::operation::StridedSlice &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + + if (operands.at(output_index).info().isDynamic()) + return; + + OP_REQUIRES(operands.at(input_index).shape().rank() <= 4); +} + +void ShapeValidator::visit(const ir::operation::Split &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)}; + + const auto num_splits = node.param().num_splits; + const auto input_rank = operands.at(input_index).shape().rank(); + auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base()); + axis = axis < 0 ? axis + input_rank : axis; + + OP_REQUIRES(axis >= 0 && axis < input_rank); + OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0); +} + +void ShapeValidator::visit(const ir::operation::Shape &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(0)}; + UNUSED_RELEASE(input_index); + OP_REQUIRES(operands.at(output_index).shape().rank() == 1); +} + +void ShapeValidator::visit(const ir::operation::ResizeBilinear &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; + + if (operands.at(output_index).info().isDynamic()) + { + return; + } + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); +} + +void ShapeValidator::visit(const ir::operation::Reverse &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)}; + + if (operands.at(output_index).info().isDynamic()) + return; + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); +} + +void ShapeValidator::visit(const ir::operation::If &) +{ + // TODO Add to validate with subgraphs +} + +void ShapeValidator::visit(const ir::operation::While &) +{ + // This validator does not check shape. So checking isDynamic() is skipped. + // TODO Add to validate with subgraphs +} + +void ShapeValidator::visit(const ir::operation::SquaredDifference &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; + + // Check for dimension constraints + if (operands.at(output_index).info().isDynamic()) + return; + + auto output_shape = operands.at(output_index).shape(); + auto lhs_shape = operands.at(lhs_index).shape(); + auto rhs_shape = operands.at(rhs_index).shape(); + // Check for output rank + OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank())); + auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank()); + + for (int idx = 1; idx <= min_rank; idx++) + { + int l_idx = lhs_shape.rank() - idx; + int r_idx = rhs_shape.rank() - idx; + int out_idx = output_shape.rank() - idx; + + OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0)); + + auto l_dims = lhs_shape.dim(l_idx); + auto r_dims = rhs_shape.dim(r_idx); + auto out_dims = output_shape.dim(out_idx); + + OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) || + ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims))); + } + auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape; + for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++) + { + int out_idx = output_shape.rank() - idx; + int tmp_idx = tmp_shape.rank() - idx; + + OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) && + (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx))); + } +} +void ShapeValidator::visit(const ir::operation::Tile &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(0)}; + const auto multiple_index{node.getInputs().at(1)}; + + OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1); + OP_REQUIRES(operands.at(multiple_index).shape().dim(0) == + operands.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); +} + +void ShapeValidator::visit(const ir::operation::Range &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)}; + const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)}; + const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)}; + + // Check for dimension constraints + if (operands.at(output_index).info().isDynamic()) + return; + + OP_REQUIRES(operands.at(start_index).shape().rank() == 0); + OP_REQUIRES(operands.at(limit_index).shape().rank() == 0); + OP_REQUIRES(operands.at(delta_index).shape().rank() == 0); +} + +void ShapeValidator::visit(const ir::operation::MatrixBandPart &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)}; + const auto num_lower_index{ + node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)}; + const auto num_upper_index{ + node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)}; + + // Check for dimension constraints + if (operands.at(output_index).info().isDynamic()) + return; + + OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix + OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar + OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar +} + +void ShapeValidator::visit(const ir::operation::LogSoftmax &node) +{ + const auto &operands = _graph.operands(); + const auto output_index{node.getOutputs().at(0)}; + if (operands.at(output_index).info().isDynamic()) + return; + + const auto input_index{node.getInputs().at(0)}; + + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); +} + +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h index deb6357bb..a51e8adc0 100644 --- a/runtime/onert/core/src/compiler/OperationValidator.h +++ b/runtime/onert/core/src/compiler/ShapeValidator.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_OPERATION_VALIDATOR_H__ -#define __ONERT_COMPILER_OPERATION_VALIDATOR_H__ +#ifndef __ONERT_COMPILER_SHAPE_VALIDATOR_H__ +#define __ONERT_COMPILER_SHAPE_VALIDATOR_H__ #include "ir/Layout.h" #include "ir/OperationVisitor.h" @@ -34,18 +34,25 @@ namespace onert namespace compiler { -class OperationValidator : public ir::OperationVisitor +class ShapeValidator : public ir::OperationVisitor { public: - OperationValidator(void) = delete; - OperationValidator(const ir::Graph &graph); + ShapeValidator(void) = delete; + ShapeValidator(const ir::Graph &graph); + ShapeValidator(const ShapeValidator &) = delete; + ShapeValidator(ShapeValidator &&) = delete; + ~ShapeValidator() = default; public: + ShapeValidator &operator=(const ShapeValidator &) = delete; + ShapeValidator &operator=(ShapeValidator &&) = delete; void operator()(); public: void visit(const ir::operation::BatchMatMul &node) override; void visit(const ir::operation::BatchToSpaceND &node) override; + void visit(const ir::operation::BCQFullyConnected &node) override; + void visit(const ir::operation::BCQGather &node) override; void visit(const ir::operation::Comparison &node) override; void visit(const ir::operation::Softmax &node) override; void visit(const ir::operation::InstanceNorm &node) override; @@ -88,13 +95,10 @@ private: void checkUnaryOp(const ir::Operation &node); private: - // TODO Remove _ctx field const ir::Graph &_graph; - const ir::Operands &_ctx; - ir::Layout _current_op_seq_layout; }; } // namespace compiler } // namespace onert -#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__ +#endif // __ONERT_COMPILER_SHAPE_VALIDATOR_H__ diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc deleted file mode 100644 index 4eba1ff49..000000000 --- a/runtime/onert/core/src/compiler/StaticShapeInference.cc +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "compiler/StaticShapeInference.h" -#include "util/ShapeInference.h" -#include "util/logging.h" - -#include <sstream> - -namespace onert -{ -namespace compiler -{ - -bool StaticShapeInferer::infer(const ir::OpSequence &op_seq) -{ - bool has_dynamic_tensor = false; - - for (const auto &operation_idx : op_seq.operations()) - { - auto &op = _operations.at(operation_idx); - auto opcode = op.opcode(); - - _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit() - - // IF: need shape inference for then, else - // While: need shape inference for condition, body - if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) - { - op.accept(*this); - } - else - { - _return_has_dynamic_tensor = checkDynamicInput(op); - - if (_return_has_dynamic_tensor) - { - setDynamicOutput(op); - } - else - { - op.accept(*this); - } - } - - has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor; - } - - return has_dynamic_tensor; -} - -bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) -{ - for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) - { - if (_operands.at(input_idx).info().isDynamic()) - { - return true; - } - } - - return false; -} - -void StaticShapeInferer::setDynamicOutput(const ir::Operation &op) -{ - for (auto output_idx : op.getOutputs()) - { - _operands.at(output_idx).info().setDynamic(); - } -} - -void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, - const ir::OperandIndex lhs_idx, - const ir::OperandIndex rhs_idx) -{ - const auto &lhs = _operands.at(lhs_idx); - const auto &rhs = _operands.at(rhs_idx); - - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, - const ir::OperandIndex input_idx) -{ - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // re-sizing output shape - ir::Shape new_shape = input.info().shape(); - output.info().shape(new_shape); -} - -void StaticShapeInferer::dump() -{ - auto get_shape_str = [](const ir::Shape &shape) { - std::stringstream sstream; - sstream << "shape : {"; - for (int i = 0; i < shape.rank(); i++) - { - if (i == 0) - sstream << shape.dim(i); - else - sstream << " " << shape.dim(i); - } - sstream << "}"; - return sstream.str(); - }; - - for (const auto &pair : _lowered_subgs) - { - const auto index = pair.first; - const auto &lowered_subg = pair.second; - VERBOSE(StaticShapeInferer) << "SubGraph #" << index.value() << std::endl; - lowered_subg->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &operand) { - VERBOSE(StaticShapeInferer) << "Operand #" << ind.value() << ", " - << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " - << get_shape_str(operand.info().shape()) << std::endl; - }); - } -} - -void StaticShapeInferer::visit(const ir::operation::ArgMax &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - const auto rank = input.info().shape().rank(); - const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); - - assert(0 <= axis && axis < rank); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis, rank); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op) -{ - const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS); - const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS); - const auto output_index = op.getOutputs().at(0); - const auto lhs = _operands.at(lhs_index); - const auto rhs = _operands.at(rhs_index); - auto &output = _operands.at(output_index); - auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS), - op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) -{ - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); - - if (!shape.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - // assert(shape.typeInfo().type() == ir::DataType::INT32); - auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base()); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Comparison &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0), - op.getInputs().at(ir::operation::Comparison::Input::INPUT1)); -} - -void StaticShapeInferer::visit(const ir::operation::Concat &op) -{ - const auto input_count = op.getInputs().size(); - - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - shape_inference::Shapes input_shapes; - for (uint32_t i = 0; i < input_count; i++) - { - const auto input_idx{op.getInputs().at(i)}; - const auto &input = _operands.at(input_idx); - input_shapes.emplace_back(input.shape()); - } - - ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param()); - - // re-sizing output shape - output.info().shape(out_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Conv2D &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)}; - const auto &ker = _operands.at(ker_idx); - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // re-sizing output shape - ir::Shape new_shape = - shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS), - op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - if (!axis.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - // even when axis is constant, output shape should be recalculated since user might call - // nnfw_set_input_tensorinfo(input, some_new_shape) - auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base()); - assert(axis_buf); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Fill &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - if (!input.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - assert(input.typeInfo().type() == ir::DataType::INT32); - - auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base()); - assert(input_buf); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::FullyConnected &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)}; - const auto &ker = _operands.at(ker_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - // re-sizing output shape - ir::Shape new_shape = - shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::Gather &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)}; - const auto &indices = _operands.at(indices_idx); - const auto rank = input.info().shape().rank(); - const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); - - assert(0 <= axis && axis < rank); - - // re-sizing output shape - ir::Shape new_shape = - shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::If &op) -{ - auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph(); - auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph(); - const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()}; - const auto &outputs = op.getOutputs(); - - // re-sizing input shapes of then subgraph - const auto &then_inputs = then_graph.getInputs(); - assert(inputs.size() == then_inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) - { - auto &then_input = then_graph.operands().at(then_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - then_input.info().setDynamic(); - } - else - { - auto new_shape = _operands.at(inputs.at(i)).info().shape(); - then_input.info().shape(new_shape); - } - } - - // re-sizing input shapes of else subgraph - const auto &else_inputs = else_graph.getInputs(); - assert(inputs.size() == else_inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) - { - auto &else_input = else_graph.operands().at(else_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - else_input.info().setDynamic(); - } - else - { - const auto &new_shape = _operands.at(inputs.at(i)).info().shape(); - else_input.info().shape(new_shape); - } - } - - // re-sizing operands of then subgraph - StaticShapeInferer then_inferer(op.param().then_subg_index, _lowered_subgs); - _lowered_subgs.at(op.param().then_subg_index) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - bool has_dynamic_tensor = then_inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - - // re-sizing operands of else subgraph - StaticShapeInferer else_inferer(op.param().else_subg_index, _lowered_subgs); - _lowered_subgs.at(op.param().else_subg_index) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - bool has_dynamic_tensor = else_inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - - // re-sizing output shapes - const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs(); - const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs(); - assert(outputs.size() == then_outputs.size()); - assert(outputs.size() == else_outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) - { - const auto &then_output = then_graph.operands().at(then_outputs.at(i)); - const auto &else_output = else_graph.operands().at(else_outputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (!then_output.info().isDynamic() && !else_output.info().isDynamic() && - then_output.shape() == else_output.shape()) - { - output.info().shape(then_output.shape()); - } - else - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - } -} - -void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::OneHot &op) -{ - const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)}; - const auto &indice = _operands.at(indice_idx); - const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)}; - const auto &depth = _operands.at(depth_idx); - - const auto axis = op.param().axis; - - auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - if (!depth.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base()); - assert(depth_buf); - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Pack &op) -{ - const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - const auto rank = input.shape().rank() + 1; - const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); - const auto num = op.param().num; - - assert(0 <= axis && axis < rank); - - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Pad &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)}; - const auto &pad = _operands.at(pad_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // if pad is not constant, output also becomes dynamic - if (!pad.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - // re-sizing output shape - const auto new_shape = shape_inference::inferPadShape( - input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()), - pad.shape().num_elements()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Permute &op) -{ - const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // re-sizing output shape - // Permute is a special operation that layouts of input/output may be different on backend - // However, it is not applied here, so input/output have the same layout of frontend. Because - // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering - // operand info to "TensorBuilder" after calling "StaticShapeInferer" - const auto new_shape = input.info().shape(); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Pow &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS), - op.getInputs().at(ir::operation::Pow::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Range &op) -{ - const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)}; - const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)}; - const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)}; - const auto &start_op = _operands.at(start_idx); - const auto &limit_op = _operands.at(limit_idx); - const auto &delta_op = _operands.at(delta_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - ir::Shape new_shape; - if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant()) - { - assert(start_op.typeInfo().type() == limit_op.typeInfo().type() && - start_op.typeInfo().type() == delta_op.typeInfo().type()); - if (output.typeInfo().type() == ir::DataType::FLOAT32) - { - new_shape = shape_inference::inferRangeShape<float>( - start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>()); - } - else if (output.typeInfo().type() == ir::DataType::INT32) - { - new_shape = shape_inference::inferRangeShape<int32_t>( - start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>()); - } - assert(output.shape() == new_shape); - } - else - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } -} - -void StaticShapeInferer::visit(const ir::operation::Reduce &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)}; - const auto &axes = _operands.at(axes_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - std::vector<int32_t> axes_vec; - for (size_t i = 0; i < axes.shape().num_elements(); ++i) - { - switch (axes.typeInfo().type()) - { - case ir::DataType::INT32: - { - axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]); - break; - } - case ir::DataType::INT64: - { - axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]); - break; - } - default: - throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type"); - break; - } - } - const auto keep_dims = op.param().keep_dims; - - // re-sizing output shape - ir::Shape new_shape = - shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Reshape &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // New shape is given by second input tensor - if (op.getInputs().size() == 2) - { - // Let's check the second input - const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); - - if (shape.isConstant()) - { - const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base()); - assert(shape_buf); - - ir::Shape new_shape = shape_inference::inferReshapeShape( - shape_buf, shape.shape().num_elements(), input.shape().num_elements()); - - // if shape is from Const, TFLC put the shape of output into tensor - if (new_shape != output.shape()) - { - // change on output shape - output.info().shape(new_shape); - } - } - else - { - // if shape is NOT Const, set output shape to be dynamic_ - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - } - // New shape is given by option - else if (op.param().new_shape.size() != 0) - { - // Let's check the new_shape option - auto shape = op.param().new_shape; - ir::Shape new_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(), - input.shape().num_elements()); - - if (new_shape != output.shape()) - { - // change on output shape - output.info().shape(new_shape); - } - } - else - { - throw std::runtime_error("Reshape: new shape is missing"); - } -} - -void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // Shape inferencing logic based on Params - ir::Shape new_shape = shape_inference::inferResizeBilinearShape( - input.shape(), op.param().height_out, op.param().width_out); - - // if size_op is from Const, TFLC put the shape of output into tensor - if (new_shape != output.shape()) - { - // change on output shape - output.info().shape(new_shape); - } -} - -void StaticShapeInferer::visit(const ir::operation::Reverse &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::Select &op) -{ - const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)}; - const auto &input_cond = _operands.at(input_cond_idx); - - const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; - const auto &input_true = _operands.at(input_true_idx); - - const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - const auto &input_false = _operands.at(input_false_idx); - - auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // Select output shpae - ir::Shape new_shape = shape_inference::inferSelectShape( - input_cond.info().shape(), input_true.info().shape(), input_false.info().shape()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Shape &op) -{ - const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - // re-sizing output shape - ir::Shape output_shape; - output_shape.append(input.info().shape().rank()); - - output.info().shape(output_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Slice &op) -{ - const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; - const auto &input = _operands.at(input_index); - const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)}; - const auto &begins = _operands.at(begins_index); - const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)}; - const auto &sizes = _operands.at(sizes_index); - const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); - - // Whether input is constant or not does not affect whether output is dynamic or not - if (!(begins.isConstant() && sizes.isConstant())) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - auto begins_buf = reinterpret_cast<const int32_t *>(begins.data()->base()); - auto sizes_buf = reinterpret_cast<const int32_t *>(sizes.data()->base()); - - ir::Shape new_shape = - shape_inference::inferSliceShape(input.info().shape(), begins_buf, sizes_buf); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Softmax &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) -{ - const auto output_index = op.getOutputs().at(0); - const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; - const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; - const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - - ir::Operand &output = _operands.at(output_index); - const auto &input = _operands.at(input_idx); - const auto &block_shape = _operands.at(block_shape_idx); - const auto &padding = _operands.at(padding_idx); - - // Whether input is constant or not does not affect whether output is dynamic or not - if (!(block_shape.isConstant() && padding.isConstant())) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - auto input_shape = input.info().shape(); - auto block_shape_shape = block_shape.info().shape(); - auto padding_shape = padding.info().shape(); - - auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base()); - auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base()); - - ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape( - input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data); - - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Split &op) -{ - const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); - - const auto axis = op.param().axis; - const auto num_splits = op.param().num_splits; - - const auto rank = input.info().shape().rank(); - auto axis_resolved = axis < 0 ? axis + rank : axis; - - assert(0 <= axis_resolved && axis_resolved < rank); - - ir::Shape new_shape = - shape_inference::inferSplitShape(input.info().shape(), axis_resolved, num_splits); - auto output_tensors = op.getOutputs(); - for (auto output_idx : output_tensors) - { - ir::Operand &output = _operands.at(output_idx); - output.info().shape(new_shape); - } -} - -void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS), - op.getInputs().at(ir::operation::SquaredDifference::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Squeeze &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - // Squeeze output shpae - ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param()); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) -{ - const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - const auto &input = _operands.at(input_index); - const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; - const auto &starts = _operands.at(starts_index); - const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; - const auto &ends = _operands.at(ends_index); - const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - const auto &strides = _operands.at(strides_index); - const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); - - if (!(starts.isConstant() && ends.isConstant() && strides.isConstant())) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - const auto begin_mask = op.param().begin_mask; - const auto end_mask = op.param().end_mask; - const auto shrink_axis_mask = op.param().shrink_axis_mask; - const auto rank = input.info().shape().rank(); - - auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base()); - auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base()); - auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base()); - - auto op_params = shape_inference::buildStridedSliceParams( - starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank); - - ir::Shape new_shape = - shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Tile &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)}; - const auto &multiplier = _operands.at(multiplier_idx); - - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - - if (!multiplier.isConstant()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - - auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base()); - assert(multiplier_buffer); - - // re-sizing output shape - auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Transpose &op) -{ - const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - const auto &input = _operands.at(input_idx); - - // get mutable output operand - const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); - const auto perm{op.param().perm}; - // const auto rank{op.param().rank}; - - // set output shape, based on input and params - ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm); - output.info().shape(new_shape); -} - -void StaticShapeInferer::visit(const ir::operation::Unpack &op) -{ - const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); - const auto num = op.param().num; - const auto rank = input.shape().rank(); - const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); - - assert(axis < rank); - if (axis < 0) - { - for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) - { - const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); - output.info().setDynamic(); - } - _return_has_dynamic_tensor = true; - return; - } - - ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank); - - // re-sizing output shape - for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) - { - const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); - output.info().shape(new_shape); - } -} - -void StaticShapeInferer::visit(const ir::operation::While &op) -{ - auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph(); - auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph(); - const auto inputs = op.getInputs(); - const auto &outputs = op.getOutputs(); - - // re-sizing input shapes of then subgraph - const auto &cond_inputs = cond_graph.getInputs(); - assert(inputs.size() == cond_inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) - { - const auto &input = _operands.at(inputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if (input.info().isDynamic()) - { - cond_input.info().setDynamic(); - } - else - { - auto new_shape = input.info().shape(); - cond_input.info().shape(new_shape); - } - } - - // re-sizing input shapes of body subgraph - const auto &body_inputs = body_graph.getInputs(); - assert(cond_inputs.size() == body_inputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &body_input = body_graph.operands().at(body_inputs.at(i)); - if (cond_input.info().isDynamic()) - { - body_input.info().setDynamic(); - } - else - { - const auto &new_shape = cond_input.info().shape(); - body_input.info().shape(new_shape); - } - } - - // re-sizing operands of body subgraph - StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs); - _lowered_subgs.at(op.param().body_subg_index) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - bool has_dynamic_tensor = body_inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - - // Check whether while operation's shapes are predictable - // If any of shape of body outputs and cond inputs are different, non-constant operands would be - // set to dynamic - bool check_unpredictable_dynamic = false; - const auto &body_outputs = body_graph.getOutputs(); - assert(body_outputs.size() == cond_inputs.size()); - for (size_t i = 0; i < body_outputs.size(); ++i) - { - const auto &body_output = body_graph.operands().at(body_outputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) || - (cond_input.shape() != body_output.shape())) - { - check_unpredictable_dynamic = true; - break; - } - } - - if (check_unpredictable_dynamic) - { - // Set inputs of body subgraph - for (const auto &input_index : body_inputs) - { - auto &input = body_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set inputs of cond subgraph - for (const auto &input_index : cond_inputs) - { - auto &input = cond_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set non-constant operands of body subgraph to dynamic - StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs); - _lowered_subgs.at(op.param().body_subg_index) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - bool has_dynamic_tensor = body_inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - } - - // re-sizing operands of cond subgraph - // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to - // dynamic - StaticShapeInferer cond_inferer(op.param().cond_subg_index, _lowered_subgs); - _lowered_subgs.at(op.param().cond_subg_index) - ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - bool has_dynamic_tensor = cond_inferer.infer(op_seq); - op_seq.has_dynamic_tensor(has_dynamic_tensor); - }); - - // re-sizing outputs of while operation - // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic - assert(cond_inputs.size() == outputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (cond_input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - else - { - const auto new_shape = cond_input.info().shape(); - output.info().shape(new_shape); - } - } -} - -} // namespace compiler - -} // namespace onert diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc new file mode 100644 index 000000000..ec5d2146b --- /dev/null +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -0,0 +1,1425 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/StaticShapeInferer.h" +#include "util/ShapeInference.h" +#include "util/logging.h" + +#include <misc/polymorphic_downcast.h> + +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace compiler +{ +void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info, + bool unpredictable) +{ + assert(changed_operands_info.size() == _operands.size()); + for (size_t i = 0; i < changed_operands_info.size(); ++i) + { + const auto &changed_operand_info = changed_operands_info.at(i); + auto &operand = _operands.at(i); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // This error check may by replaced by an assertion if this function is called after the + // validation of models are completed. + if (changed_operand_info.typeInfo() != operand->typeInfo()) + { + throw std::runtime_error("OperandObserver: The types of operands are mismatched"); + } + if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable)) + { + operand->info().setDynamic(); + } + else + { + const auto &new_shape = changed_operands_info.at(i).shape(); + operand->info().shape(new_shape); + } + } +} + +void StaticShapeInferer::infer() +{ + for (const auto &op_idx : _lowered_subg->graph().topolSortOperations()) + { + const auto &op = _lowered_subg->graph().operations().at(op_idx); + bool has_dynamic_tensor = false; + const auto opcode = op.opcode(); + // IF: requires shape inference for then, else + // While: requires shape inference for condition, body + if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) + { + op.accept(*this); + } + else + { + has_dynamic_tensor = checkDynamicInput(op); + if (has_dynamic_tensor) + { + setDynamicOutput(op); + } + else + { + op.accept(*this); + } + } + has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op); + _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor); + } + + if (_controlflow_output_observer != nullptr) + { + // re-sizing output shapes of the controflow operation branching to this subgraph + std::vector<ir::OperandInfo> outputs_info; + const auto &graph = _lowered_subg->graph(); + const auto &outputs = graph.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + const auto &operand_info = graph.operands().at(outputs.at(i)).info(); + outputs_info.emplace_back(operand_info); + } + _controlflow_output_observer->updateShapes(outputs_info); + } +} + +bool StaticShapeInferer::checkDynamicInput(const ir::IOperation &op) +{ + const auto &operands = _lowered_subg->graph().operands(); + for (auto &&input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (operands.at(input_idx).info().isDynamic()) + { + return true; + } + } + + return false; +} + +bool StaticShapeInferer::checkDynamicOutput(const ir::IOperation &op) +{ + auto &operands = _lowered_subg->graph().operands(); + for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + { + if (operands.at(output_idx).info().isDynamic()) + { + return true; + } + } + return false; +} + +void StaticShapeInferer::setDynamicOutput(const ir::IOperation &op) +{ + auto &operands = _lowered_subg->graph().operands(); + for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + { + operands.at(output_idx).info().setDynamic(); + } +} + +void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, + const ir::OperandIndex lhs_idx, + const ir::OperandIndex rhs_idx) +{ + auto &operands = _lowered_subg->graph().operands(); + const auto &lhs = operands.at(lhs_idx); + const auto &rhs = operands.at(rhs_idx); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, + const ir::OperandIndex input_idx) +{ + auto &operands = _lowered_subg->graph().operands(); + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // re-sizing output shape + ir::Shape new_shape = input.info().shape(); + output.info().shape(new_shape); +} + +void StaticShapeInferer::dump() +{ + auto get_shape_str = [](const ir::Shape &shape) { + std::stringstream sstream; + sstream << "shape : {"; + for (int i = 0; i < shape.rank(); i++) + { + if (i == 0) + sstream << shape.dim(i); + else + sstream << " " << shape.dim(i); + } + sstream << "}"; + return sstream.str(); + }; + + _lowered_subg->graph().operands().iterate( + [&](const ir::OperandIndex &ind, const ir::Operand &operand) { + VERBOSE(StaticShapeInferer) << " " << ind << ", " + << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " + << get_shape_str(operand.info().shape()) << std::endl; + }); +} + +std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> +StaticShapeInferer::createStaticShapeInferers( + const std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> &lowered_subgs) +{ + // Allocate StaticShapeInferer per each subgraph + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers; + for (auto &&pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg); + } + + // Append observers in all StaticShapeInferers + for (auto &&pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + + // TODO: Change this iteration for all to controlflow iteration + lowered_subg->graph().operations().iterate( + [&](const ir::OperationIndex &, const ir::IOperation &op) { + // A Function to append child inferers. These make it possible for a StaticShapeInferer to + // call StaticShapeInferes of child subgraphs recursively + auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) { + auto *child_inferer = inferers.at(child_subg_idx).get(); + inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer); + }; + + // A Function to appaend subg input observers. This makes it possible for a + // StaticShapeInferer to update inputs of child subgraphs + auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector<ir::Operand *> child_subg_inputs; + auto &child_subg = lowered_subgs.at(child_subg_idx)->graph(); + for (const auto &input_idx : child_subg.getInputs()) + { + auto operand_ptr = child_subg.operands().getRawPtr(input_idx); + child_subg_inputs.emplace_back(operand_ptr); + } + inferers.at(subg_index) + ->appendSubgInputObserver(child_subg_idx, + std::make_unique<OperandObserver>(child_subg_inputs)); + }; + + // A Function to set controlflow output observers. This makes it possible for a + // StaticShapeInferer to update outputs of parent controlflow opeerations + auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector<ir::Operand *> cf_outputs; + auto &subg = lowered_subg->graph(); + for (const auto &output_idx : op.getOutputs()) + { + auto operand_ptr = subg.operands().getRawPtr(output_idx); + cf_outputs.emplace_back(operand_ptr); + } + inferers.at(child_subg_idx) + ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs)); + }; + + // Append Observers in a StaticShapeInferer + if (op.opcode() == ir::OpCode::If) + { + // TODO Remove dynamic_cast + // An virtual base class cannot be downcasted by static_cast + const auto &if_op = dynamic_cast<const ir::operation::If &>(op); + + appendChildInferer(if_op.param().then_subg_index); + appendChildInferer(if_op.param().else_subg_index); + + appendSubgraphInputObserver(if_op.param().then_subg_index); + appendSubgraphInputObserver(if_op.param().else_subg_index); + + setControlFlowOutputObserver(if_op.param().then_subg_index); + } + else if (op.opcode() == ir::OpCode::While) + { + // TODO Remove dynamic_cast + const auto &while_op = dynamic_cast<const ir::operation::While &>(op); + + appendChildInferer(while_op.param().cond_subg_index); + appendChildInferer(while_op.param().body_subg_index); + + appendSubgraphInputObserver(while_op.param().cond_subg_index); + appendSubgraphInputObserver(while_op.param().body_subg_index); + + setControlFlowOutputObserver(while_op.param().body_subg_index); + } + }); + } + + return inferers; +} + +void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; + const auto &axis = operands.at(axis_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + if (!axis.isConstant()) + { + output.info().setDynamic(); + return; + } + + const auto rank = input.info().shape().rank(); + auto axis_value = axis.asScalar<int32_t>(); + axis_value = axis_value < 0 ? axis_value + rank : axis_value; + + // re-sizing output shape + ir::Shape new_shape = + shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS); + const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS); + const auto output_index = op.getOutputs().at(0); + const auto &lhs = operands.at(lhs_index); + const auto &rhs = operands.at(rhs_index); + auto &output = operands.at(output_index); + auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto cluster_idx{ + op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; + const auto &cluster = operands.at(cluster_idx); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base()); + assert(cluster_buf); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape( + input.info().shape(), cluster.info().shape(), cluster_buf); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::BCQGather &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; + const auto &indices = operands.at(indices_idx); + + const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)}; + const auto &input_binary = operands.at(input_binary_idx); + + const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; + const auto &cluster = operands.at(cluster_idx); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base()); + assert(cluster_buf); + + auto rank = input_binary.shape().rank(); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferBCQGatherShape( + indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param()); + + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS), + op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)); +} + +void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) +{ + // get mutable output operand + auto &operands = _lowered_subg->graph().operands(); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)}; + const auto &shape = operands.at(shape_idx); + + if (!shape.isConstant()) + { + output.info().setDynamic(); + return; + } + + // assert(shape.typeInfo().type() == ir::DataType::INT32); + auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base()); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Comparison &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0), + op.getInputs().at(ir::operation::Comparison::Input::INPUT1)); +} + +void StaticShapeInferer::visit(const ir::operation::Concat &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_count = op.getInputs().size(); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + shape_inference::Shapes input_shapes; + for (uint32_t i = 0; i < input_count; i++) + { + const auto input_idx{op.getInputs().at(i)}; + const auto &input = operands.at(input_idx); + input_shapes.emplace_back(input.shape()); + } + + ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param()); + + // re-sizing output shape + output.info().shape(out_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Conv2D &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)}; + const auto &input = operands.at(input_idx); + const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)}; + const auto &ker = operands.at(ker_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // re-sizing output shape + ir::Shape new_shape = + shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS), + op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)); +} + +void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; + const auto &input = operands.at(input_idx); + const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + const auto &axis = operands.at(axis_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + if (!axis.isConstant()) + { + output.info().setDynamic(); + return; + } + + // even when axis is constant, output shape should be recalculated since user might call + // nnfw_set_input_tensorinfo(input, some_new_shape) + auto axis_type = axis.typeInfo().type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); + + assert(axis.data()->base()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast<const int32_t *>(axis.data()->base())[0] + : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Fill &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)}; + const auto &shape = operands.at(shape_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + if (!shape.isConstant()) + { + output.info().setDynamic(); + return; + } + + const auto dims_type = shape.typeInfo().type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); + + auto dims_buf = shape.data()->base(); + assert(dims_buf); + + const auto &dims_shape = shape.info().shape(); + const auto &new_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape<int32_t>( + dims_shape, reinterpret_cast<const int32_t *>(dims_buf)) + : shape_inference::inferFillShape<int64_t>( + dims_shape, reinterpret_cast<const int64_t *>(dims_buf))); + + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::FullyConnected &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)}; + const auto &ker = operands.at(ker_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + // re-sizing output shape + ir::Shape new_shape = + shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::Gather &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)}; + const auto &indices = operands.at(indices_idx); + const auto rank = input.info().shape().rank(); + const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); + + assert(0 <= axis && axis < rank); + + // re-sizing output shape + ir::Shape new_shape = + shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::If &op) +{ + // re-sizing input shapes of then/else subgraph + const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()}; + + std::vector<ir::OperandInfo> inputs_info; + const auto &graph = _lowered_subg->graph(); + for (size_t i = 0; i < inputs.size(); ++i) + { + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); + } + _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().then_subg_index)->infer(); + + _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().else_subg_index)->infer(); +} + +void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::Loss &) +{ + // TODO Consider SparseCategoricalCrossentropy case + + // TODO Consider output shape in case of reduction option +} + +void StaticShapeInferer::visit(const ir::operation::LSTM &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + auto &output = operands.at(output_index); + + const auto output_state_out_index{ + op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + + const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + + const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + + if (output.info().isDynamic() || + (operands.exist(output_state_out_index) && + operands.at(output_state_out_index).info().isDynamic()) || + (operands.exist(cell_state_out_index) && + operands.at(cell_state_out_index).info().isDynamic()) || + (operands.exist(scratch_buffer_index) && + operands.at(scratch_buffer_index).info().isDynamic())) + return; + + const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto &input = operands.at(input_index); + + const auto input_to_output_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto &input_to_output_weights = operands.at(input_to_output_weights_index); + + const auto recurrent_to_output_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index); + + // re-sizing outputs + const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1) + : input.shape().dim(0); + const int n_cell = input_to_output_weights.shape().dim(0); + const int n_output = recurrent_to_output_weights.shape().dim(1); + if (input.shape().rank() == 3) + { + if (op.param().time_major) + output.info().shape(ir::Shape{input.shape().dim(0), n_batch, n_output}); + else + output.info().shape(ir::Shape{n_batch, input.shape().dim(1), n_output}); + } + else + { + assert(input.shape().rank() == 2); + output.info().shape(ir::Shape{n_batch, n_output}); + } + + if (operands.exist(output_state_out_index)) + { + auto &output_state_out = operands.at(output_state_out_index); + output_state_out.info().shape(ir::Shape{n_batch, n_output}); + } + + if (operands.exist(cell_state_out_index)) + { + auto &cell_state_out = operands.at(cell_state_out_index); + cell_state_out.info().shape(ir::Shape{n_batch, n_cell}); + } + + if (operands.exist(scratch_buffer_index)) + { + auto &scratch_buffer = operands.at(scratch_buffer_index); + + const auto input_to_input_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; + + bool has_input_to_input_weights = + operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0; + bool has_recurrent_to_input_weights = + operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + + // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). + // true: no CIFG + // false: CIFG + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + if (has_cifg_param) + { + scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 4}); + } + else + { + scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 3}); + } + } +} + +void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::OneHot &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)}; + const auto &indice = operands.at(indice_idx); + const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)}; + const auto &depth = operands.at(depth_idx); + + const auto axis = op.param().axis; + + auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + if (!depth.isConstant()) + { + output.info().setDynamic(); + return; + } + + const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base()); + assert(depth_buf); + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Pack &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + const auto rank = input.shape().rank() + 1; + const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); + const auto num = op.param().num; + + assert(0 <= axis && axis < rank); + + // re-sizing output shape + ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Pad &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto &pad = operands.at(pad_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // if pad is not constant, output also becomes dynamic + if (!pad.isConstant()) + { + output.info().setDynamic(); + return; + } + + // re-sizing output shape + const auto new_shape = shape_inference::inferPadShape( + input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()), + pad.shape().num_elements()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Permute &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // re-sizing output shape + // Permute is a special operation that layouts of input/output may be different on backend + // However, it is not applied here, so input/output have the same layout of frontend. Because + // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering + // operand info to "TensorBuilder" after calling "StaticShapeInferer" + const auto &new_shape = input.info().shape(); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Pow &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS), + op.getInputs().at(ir::operation::Pow::Input::RHS)); +} + +void StaticShapeInferer::visit(const ir::operation::Range &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)}; + const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)}; + const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)}; + const auto &start_op = operands.at(start_idx); + const auto &limit_op = operands.at(limit_idx); + const auto &delta_op = operands.at(delta_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + ir::Shape new_shape; + if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant()) + { + assert(start_op.typeInfo().type() == limit_op.typeInfo().type() && + start_op.typeInfo().type() == delta_op.typeInfo().type()); + if (output.typeInfo().type() == ir::DataType::FLOAT32) + { + new_shape = shape_inference::inferRangeShape<float>( + start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>()); + } + else if (output.typeInfo().type() == ir::DataType::INT32) + { + new_shape = shape_inference::inferRangeShape<int32_t>( + start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>()); + } + assert(output.shape() == new_shape); + } + else + { + output.info().setDynamic(); + } +} + +void StaticShapeInferer::visit(const ir::operation::Reduce &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)}; + const auto &axes = operands.at(axes_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + std::vector<int32_t> axes_vec; + for (size_t i = 0; i < axes.shape().num_elements(); ++i) + { + switch (axes.typeInfo().type()) + { + case ir::DataType::INT32: + { + axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]); + break; + } + case ir::DataType::INT64: + { + axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]); + break; + } + default: + throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type"); + break; + } + } + const auto keep_dims = op.param().keep_dims; + + // re-sizing output shape + ir::Shape new_shape = + shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Reshape &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // New shape is given by second input tensor + if (op.getInputs().size() == 2) + { + // Let's check the second input + const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; + const auto &shape = operands.at(shape_idx); + + if (shape.isConstant()) + { + const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base()); + assert(shape_buf); + + ir::Shape new_shape = shape_inference::inferReshapeShape( + shape_buf, shape.shape().num_elements(), input.shape().num_elements()); + + // if shape is from Const, TFLC put the shape of output into tensor + if (new_shape != output.shape()) + { + // change on output shape + output.info().shape(new_shape); + } + } + else + { + // if shape is NOT Const, set output shape to be dynamic_ + output.info().setDynamic(); + } + } + // New shape is given by option + else if (op.param().new_shape.size() != 0) + { + // Let's check the new_shape option + auto shape = op.param().new_shape; + ir::Shape new_shape = + shape_inference::inferReshapeShape(shape.data(), shape.size(), input.shape().num_elements()); + + if (new_shape != output.shape()) + { + // change on output shape + output.info().shape(new_shape); + } + } + else + { + throw std::runtime_error("Reshape: new shape is missing"); + } +} + +void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + int32_t height_out, width_out; + if (op.getInputs().size() == 2) + { + auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE)); + if (!size.isConstant()) + { + output.info().setDynamic(); + return; + } + const auto size_v = size.asVector<std::int32_t>(); + height_out = size_v[0]; + width_out = size_v[1]; + } + else + { + height_out = op.param().height_out; + width_out = op.param().width_out; + } + + // Shape inferencing logic based on Params + ir::Shape new_shape = + shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out); + + // if size_op is from Const, TFLC put the shape of output into tensor + if (new_shape != output.shape()) + { + // change on output shape + output.info().shape(new_shape); + } +} + +void StaticShapeInferer::visit(const ir::operation::Reverse &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::Select &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)}; + const auto &input_cond = operands.at(input_cond_idx); + + const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; + const auto &input_true = operands.at(input_true_idx); + + const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; + const auto &input_false = operands.at(input_false_idx); + + auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // Select output shpae + ir::Shape new_shape = shape_inference::inferSelectShape( + input_cond.info().shape(), input_true.info().shape(), input_false.info().shape()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Shape &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // re-sizing output shape + ir::Shape output_shape; + output_shape.append(input.info().shape().rank()); + + output.info().shape(output_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Slice &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; + const auto &input = operands.at(input_index); + const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)}; + const auto &begins = operands.at(begins_index); + const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)}; + const auto &sizes = operands.at(sizes_index); + const auto output_index = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_index); + + // Whether input is constant or not does not affect whether output is dynamic or not + if (!(begins.isConstant() && sizes.isConstant())) + { + output.info().setDynamic(); + return; + } + + auto begins_buf = begins.data()->base(); + auto sizes_buf = sizes.data()->base(); + + const auto begins_type = begins.typeInfo().type(); + assert(begins_type == ir::DataType::INT32 || begins_type == ir::DataType::INT64); + assert(begins_type == sizes.typeInfo().type()); + + ir::Shape new_shape = + (begins_type == ir::DataType::INT32) + ? shape_inference::inferSliceShape<int32_t>(input.info().shape(), + reinterpret_cast<const int32_t *>(begins_buf), + reinterpret_cast<const int32_t *>(sizes_buf)) + : shape_inference::inferSliceShape<int64_t>(input.info().shape(), + reinterpret_cast<const int64_t *>(begins_buf), + reinterpret_cast<const int64_t *>(sizes_buf)); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Softmax &op) +{ + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT)); +} + +void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto output_index = op.getOutputs().at(0); + const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; + const auto &block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto &padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; + + ir::Operand &output = operands.at(output_index); + const auto &input = operands.at(input_idx); + const auto &block_shape = operands.at(block_shape_idx); + const auto &padding = operands.at(padding_idx); + + // Whether input is constant or not does not affect whether output is dynamic or not + if (!(block_shape.isConstant() && padding.isConstant())) + { + output.info().setDynamic(); + return; + } + + const auto &input_shape = input.info().shape(); + const auto &block_shape_shape = block_shape.info().shape(); + const auto &padding_shape = padding.info().shape(); + + auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base()); + auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base()); + + ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape( + input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data); + + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Split &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)}; + const auto &axis = operands.at(axis_idx); + + auto outputs = op.getOutputs(); + if (!axis.isConstant()) + { + for (auto &&output_idx : outputs) + { + ir::Operand &output = operands.at(output_idx); + output.info().setDynamic(); + } + return; + } + + const auto num_splits = op.param().num_splits; + + const auto rank = input.info().shape().rank(); + auto axis_value = axis.asScalar<int32_t>(); + axis_value = axis_value < 0 ? axis_value + rank : axis_value; + + assert(0 <= axis_value && axis_value < rank); + + ir::Shape new_shape = + shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits); + for (auto &&output_idx : outputs) + { + ir::Operand &output = operands.at(output_idx); + output.info().shape(new_shape); + } +} + +void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS), + op.getInputs().at(ir::operation::SquaredDifference::Input::RHS)); +} + +void StaticShapeInferer::visit(const ir::operation::Squeeze &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + // Squeeze output shpae + ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + const auto &input = operands.at(input_index); + const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; + const auto &starts = operands.at(starts_index); + const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; + const auto &ends = operands.at(ends_index); + const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; + const auto &strides = operands.at(strides_index); + const auto output_index = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_index); + + if (!(starts.isConstant() && ends.isConstant() && strides.isConstant())) + { + output.info().setDynamic(); + return; + } + + const auto begin_mask = op.param().begin_mask; + const auto end_mask = op.param().end_mask; + const auto shrink_axis_mask = op.param().shrink_axis_mask; + const auto rank = input.info().shape().rank(); + + auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base()); + auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base()); + auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base()); + + auto op_params = shape_inference::buildStridedSliceParams( + starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank); + + ir::Shape new_shape = + shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Tile &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)}; + const auto &multiplier = operands.at(multiplier_idx); + + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + if (!multiplier.isConstant()) + { + output.info().setDynamic(); + return; + } + + auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base()); + assert(multiplier_buffer); + + // re-sizing output shape + auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer, + multiplier.shape().num_elements()); + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Transpose &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)}; + const auto &input = operands.at(input_idx); + + const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; + const auto &perm = operands.at(perm_idx); + + // perm.shape() != ir::Shape{0} means that perm is (n-1...0) + // TODO This condition changes to perm.num_elements() == 0 + const auto is_regular_transpose = perm.shape() == ir::Shape{0}; + + // get mutable output operand + const auto output_idx = op.getOutputs().at(0); + auto &output = operands.at(output_idx); + if (!perm.isConstant() && !is_regular_transpose) + { + output.info().setDynamic(); + return; + } + + ir::Shape new_shape; + if (is_regular_transpose) + { + // Call by (n-1...0) + new_shape = shape_inference::inferTransposeShape(input.info().shape(), nullptr, 0); + } + else + { + // Check rank + if (input.info().shape().rank() != static_cast<int>(perm.info().shape().num_elements())) + { + throw std::runtime_error("StaticShapeInferer failed, bad rank size: " + + std::to_string(perm.info().shape().num_elements())); + } + + // set output shape, based on input and params + const auto perm_buf = reinterpret_cast<const int32_t *>(perm.data()->base()); + new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm_buf, + perm.shape().num_elements()); + } + output.info().shape(new_shape); +} + +void StaticShapeInferer::visit(const ir::operation::Unpack &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + const auto num = op.param().num; + const auto rank = input.shape().rank(); + const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); + + assert(axis < rank); + if (axis < 0) + { + for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) + { + const auto output_idx = op.getOutputs().at(out_tensor_idx); + ir::Operand &output = operands.at(output_idx); + output.info().setDynamic(); + } + return; + } + + ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank); + + // re-sizing output shape + for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) + { + const auto output_idx = op.getOutputs().at(out_tensor_idx); + ir::Operand &output = operands.at(output_idx); + output.info().shape(new_shape); + } +} + +void StaticShapeInferer::visit(const ir::operation::While &op) +{ + auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get(); + auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get(); + // re-sizing input shapes of body subgraph + const auto &inputs = op.getInputs(); + std::vector<ir::OperandInfo> inputs_info; + const auto &graph = _lowered_subg->graph(); + for (size_t i = 0; i < inputs.size(); ++i) + { + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); + } + + body_input_observer->updateShapes(inputs_info); + _child_inferers.at(op.param().body_subg_index)->infer(); + + // Check whether while operation's shapes are predictable + // This while op's outputs are also updated in the above function + // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and + // thils op's outputs must have the same shape. So we can predict whether body subgraphs will + // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs + // and inputs are different Non-constant operands will be set to dynamic. + bool check_unpredictable_dynamic = false; + const auto &updated_outputs = op.getOutputs(); + assert(inputs_info.size() == updated_outputs.size()); + for (size_t i = 0; i < updated_outputs.size(); ++i) + { + const auto &input_info = inputs_info.at(i); + const auto &output_info = graph.operands().at(updated_outputs.at(i)).info(); + if (input_info.isDynamic() != output_info.isDynamic() || + input_info.shape() != output_info.shape()) + { + check_unpredictable_dynamic = true; + break; + } + } + + if (check_unpredictable_dynamic) + { + body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().body_subg_index)->infer(); + } + cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().cond_subg_index)->infer(); +} + +void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) +{ + // TODO: NMS supports very limited input/output size. + ir::operation::DetectionPostProcess::Param param = op.param(); + + auto &operands = _lowered_subg->graph().operands(); + const int num_detected_boxes = param.max_detections * param.max_classes_per_detection; + + const auto output_idx1 = op.getOutputs().at(0); + auto &output1 = operands.at(output_idx1); + output1.info().shape({1, num_detected_boxes, 4}); + + const auto output_idx2 = op.getOutputs().at(1); + auto &output2 = operands.at(output_idx2); + output2.info().shape({1, num_detected_boxes}); + + const auto output_idx3 = op.getOutputs().at(2); + auto &output3 = operands.at(output_idx3); + output3.info().shape({1, num_detected_boxes}); + + const auto output_idx4 = op.getOutputs().at(3); + auto &output4 = operands.at(output_idx4); + output4.info().shape({1}); +} +void StaticShapeInferer::visit(const ir::operation::Bulk &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + // TODO: support multiple inputs/outputs + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + const auto &cur_input_shape = input.info().shape(); + auto origin_output_shape = op.param().origin_output_shapes[0]; + + // TODO: more check for valid batch request + if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) || + (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0)) + { + throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size"); + } + size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0); + + ir::Shape new_shape; + new_shape.append(origin_output_shape.dim(0) * batch_multiplier); + for (int32_t d = 1; d < origin_output_shape.rank(); ++d) + new_shape.append(origin_output_shape.dim(d)); + + output.info().shape(new_shape); +} + +} // namespace compiler + +} // namespace onert diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h deleted file mode 100644 index 3b0360b4b..000000000 --- a/runtime/onert/core/src/compiler/TensorBuilders.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__ -#define __ONERT_COMPILER_TENSOR_BUILDERS_H__ - -#include <unordered_set> -#include <memory> -#include "backend/BackendContext.h" -#include "backend/Backend.h" -#include "backend/controlflow/Config.h" -#include "backend/controlflow/TensorBuilder.h" -#include "util/logging.h" - -namespace onert -{ -namespace compiler -{ - -class TensorBuilders -{ -public: - TensorBuilders() = default; - - TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow) - { - for (const auto &e : backend_contexts) - { - if (e.first->config()->id() == backend::controlflow::Config::ID) - { - _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>( - e.second->tensor_builder); - if (include_controlflow) - _tensor_builders.insert(e.second->tensor_builder); - } - else - { - _tensor_builders.insert(e.second->tensor_builder); - } - } - } - - std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const - { - return _tensor_builders.cbegin(); - } - std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const - { - return _tensor_builders.cend(); - } - - std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const - { - return _cf_tensor_builder; - } - -private: - std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders; - std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder; -}; - -} // namespace compiler -} // namespace onert - -#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__ diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h index 8be87b081..c7e06e84c 100644 --- a/runtime/onert/core/src/compiler/TensorRegistries.h +++ b/runtime/onert/core/src/compiler/TensorRegistries.h @@ -17,13 +17,14 @@ #ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__ #define __ONERT_COMPILER_TENSOR_REGISTRIES_H__ -#include <unordered_set> -#include <memory> -#include "backend/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/TensorRegistry.h" + #include "backend/Backend.h" -#include "backend/controlflow/Config.h" -#include "backend/controlflow/TensorBuilder.h" -#include "backend/controlflow/TensorRegistry.h" +#include "backend/BackendContext.h" + +#include <memory> +#include <unordered_set> namespace onert { @@ -35,17 +36,16 @@ class TensorRegistries public: TensorRegistries() = default; - TensorRegistries(const onert::backend::BackendContexts &backend_contexts, - bool include_controlflow) + TensorRegistries(const onert::backend::BackendContexts &backend_contexts, bool include_builtin) { for (const auto &e : backend_contexts) { auto tensor_reg = e.second->tensor_registry; - if (e.first->config()->id() == backend::controlflow::Config::ID) + if (e.first->config()->id() == backend::builtin::Config::ID) { - _cf_tensor_reg = - std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg); - if (include_controlflow) + _builtin_tensor_reg = + std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(tensor_reg); + if (include_builtin) _tensor_regs.insert(tensor_reg); } else @@ -64,14 +64,14 @@ public: return _tensor_regs.cend(); } - std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const + std::shared_ptr<backend::builtin::TensorRegistry> getBuiltinTensorRegistry() const { - return _cf_tensor_reg; + return _builtin_tensor_reg; } - std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const + backend::ITensor *getITensor(ir::OperandIndex ind) const { - for (auto &tensor_reg : _tensor_regs) + for (auto &&tensor_reg : _tensor_regs) { auto tensor = tensor_reg->getITensor(ind); if (tensor) @@ -82,7 +82,7 @@ public: private: std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs; - std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg; + std::shared_ptr<backend::builtin::TensorRegistry> _builtin_tensor_reg; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc index 647669e46..a6590b13f 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc @@ -17,8 +17,9 @@ #include "ConstantInsertionPass.h" #include "backend/Backend.h" -#include <ir/Graph.h> -#include <util/Utils.h> +#include "ir/Graph.h" +#include "util/Utils.h" +#include "util/logging.h" namespace onert { @@ -27,15 +28,14 @@ namespace compiler namespace pass { -void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) +void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node) { - const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index); - const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index); - const auto backend = op_seq_lower_info->backend(); - const auto layout = op_seq_lower_info->layout(); - const auto factor = ir::operand::PermuteFactor{backend, layout}; + const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index); + const auto backend = op_lower_info->backend(); + const auto layout = op_lower_info->layout(); + const auto factor = PermuteFactor{backend, layout}; - for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); @@ -44,22 +44,13 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O const auto key = ReplaceKey{input, factor}; if (_replace_operands_map.count(key) == 0) { - auto new_object = object; - new_object.unsetDef(); - // TODO Remove const_case - const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear(); + ir::Operand new_object(object); + new_object.clearDefUse(); const auto new_index = _graph.operands().emplace(new_object); _replace_operands_map[key] = new_index; } const auto replaced_input = _replace_operands_map[key]; - // Update op_seq - if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input)) - { - // All inputs of op_seq have the same PermuteFactor because those inputs are inputs of first - // operation - _lowered_graph.op_seqs().at(op_sequence_index).replaceInputs(input, replaced_input); - } // Update the same inputs of a node at once because inputs of an operation have the same // PermuteFactor @@ -69,6 +60,8 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O auto &replaced_object = _graph.operands().at(replaced_input); replaced_object.insertUse(node_index); + VERBOSE(ConstInsertPass) << "New operand " << replaced_input << " added(copy of " << input + << ") for " << factor << std::endl; // Remove this node from uses of origin operand // Constant operand has no def. assert(!object.getDef().valid()); @@ -76,12 +69,16 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O // Remove origin operand if (object.getUses().size() == 0) + { _graph.removeOperand(input); + VERBOSE(ConstInsertPass) << "Original operand " << input << " removed - no uses" + << std::endl; + } } } // Now this runtime does not support the node making output as constant - for (const auto &output : node.getOutputs()) + for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { UNUSED_RELEASE(output); assert(!_graph.operands().at(output).isConstant()); diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h index 052883c92..d5b9aa14e 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ #define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ -#include <ir/operand/PermuteFactor.h> +#include <compiler/PermuteFactor.h> #include <ir/Index.h> #include "LoweredOperationPass.h" #include <unordered_map> @@ -39,13 +39,13 @@ public: std::string id() final { return "ConstantInsertionPass"; } public: - void callback(const ir::OperationIndex &index, ir::Operation &node) final; + void callback(const ir::OperationIndex &index, ir::IOperation &node) final; private: struct ReplaceKey { ir::OperandIndex index; - ir::operand::PermuteFactor factor; + PermuteFactor factor; bool operator==(const ReplaceKey &other) const { @@ -61,8 +61,7 @@ private: std::size_t operator()(const ReplaceKey &key) const noexcept { using std::hash; - return hash<ir::OperandIndex>()(key.index) ^ - (hash<ir::operand::PermuteFactor>()(key.factor) << 1); + return hash<ir::OperandIndex>()(key.index) ^ (hash<PermuteFactor>()(key.factor) << 1); } }; diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc index 1c1dbe0ee..32e32d0ef 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc @@ -18,8 +18,9 @@ #include "backend/Backend.h" #include <ir/Graph.h> -#include <ir/operand/PermuteFactor.h> +#include <compiler/PermuteFactor.h> #include <util/Utils.h> +#include "util/logging.h" namespace onert { @@ -28,25 +29,25 @@ namespace compiler namespace pass { -void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) +void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node) { - const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index); - const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index); - const auto backend = op_seq_lower_info->backend(); - const auto layout = op_seq_lower_info->layout(); - const auto factor = ir::operand::PermuteFactor{backend, layout}; + const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index); + const auto backend = op_lower_info->backend(); + const auto layout = op_lower_info->layout(); + const auto factor = PermuteFactor{backend, layout}; // Now this runtime does not support the node making output of operation as constant - for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); if (object.isConstant()) { // All constant operand are already assinged at each backend by ContantInsertionPass. So a // constant has `def` and `use` as the same PermuteFactor - _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>()); - _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor); - _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor); + auto operand_li = std::make_unique<compiler::OperandLowerInfo>(); + operand_li->addDefPermuteFactor(factor); + operand_li->addUsePermuteFactor(factor); + _lowered_graph.lower_info().operand.set(input, std::move(operand_li)); } } } diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h index e17d776d1..d60a1033f 100644 --- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h @@ -36,7 +36,7 @@ public: std::string id() final { return "ConstantLoweringPass"; } public: - void callback(const ir::OperationIndex &index, ir::Operation &node) final; + void callback(const ir::OperationIndex &index, ir::IOperation &node) final; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc new file mode 100644 index 000000000..1448de473 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConstantOutputPass.h" + +#include "ir/Graph.h" +#include "ir/operation/Permute.h" +#include "util/logging.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +void ConstantOutputPass::callback(const ir::OperandIndex &ind, ir::Operand &obj) +{ + if (!_graph.getOutputs().contains(ind) || !obj.isConstant()) + return; + + auto permute_input_ind = _graph.addOperand(obj.shape(), obj.typeInfo()); + auto &permute_input_obj = _graph.operands().at(permute_input_ind); + + // Move the const data + permute_input_obj.data(obj.shareData()); + obj.releaseData(); + obj.info().setAsNonConst(); + + using ir::operation::Permute; + auto permute_obj = std::make_unique<Permute>(permute_input_ind, ind, Permute::Type::COPY); + auto permute_ind = _graph.operations().push(std::move(permute_obj)); + + permute_input_obj.insertUse(permute_ind); + obj.setDef(permute_ind); + + // Make the operations that uses this operand to use the generated operand + auto orig_uses = obj.getUses(); + for (auto &&use : orig_uses) + { + permute_input_obj.insertUse(use); + obj.removeUse(use); + _graph.operations().at(use).replaceInputs(ind, permute_input_ind); + } + + VERBOSE(ConstantOutputPass) << "Permute Op inserted for a constant ouput, node index : " + << permute_ind << std::endl; + VERBOSE(ConstantOutputPass) << " - Input (inserted) Operand : " << permute_input_ind + << std::endl; + VERBOSE(ConstantOutputPass) << " - Output(original) Operand : " << ind << std::endl; +} + +} // namespace pass +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h new file mode 100644 index 000000000..193dd3a68 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__ +#define __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__ + +#include "OperandPass.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +/** + * @brief Pass to specially handle constant model outputs + * + * As an output buffer is given right before an execution but constant initialization is done at + * prepare phase, the current runtime structure cannot handle when an output is constant. + * To resolve this problem, this pass inserts a Permute layer with a const input and make the model + * output tensor to be its output. + * + * e.g.) + * + * ((Const Output)) + * + * becomes + * + * (Const) -> [Permute] -> ((Output)) + * + * Note that this is a mandatory pass for Graph. + */ +class ConstantOutputPass : public OperandPass +{ +public: + using OperandPass::OperandPass; + +public: + std::string id() final { return "ConstantOutputPass"; } + +public: + void callback(const ir::OperandIndex &i, ir::Operand &o) final; +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ diff --git a/runtime/onert/core/src/compiler/pass/IPass.h b/runtime/onert/core/src/compiler/pass/IPass.h new file mode 100644 index 000000000..77f5916fd --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/IPass.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_IPASS_H__ +#define __ONERT_COMPILER_PASS_IPASS_H__ + +#include <string> + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +struct IPass +{ + virtual ~IPass() = default; + + virtual std::string id() = 0; + virtual void run() = 0; +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_IPASS_H__ diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h index 0c5f7d745..64831a0ac 100644 --- a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h @@ -18,7 +18,7 @@ #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__ #include "OperandPass.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" namespace onert { @@ -30,8 +30,8 @@ namespace pass class LoweredOperandPass : public OperandPass { public: - LoweredOperandPass(compiler::LoweredGraph &lowered_graph) - : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} + LoweredOperandPass(compiler::ILoweredGraph &lowered_graph) + : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING } @@ -42,7 +42,7 @@ public: void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0; protected: - compiler::LoweredGraph &_lowered_graph; + compiler::ILoweredGraph &_lowered_graph; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h index 5c8569be2..27ca77c91 100644 --- a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h @@ -18,7 +18,7 @@ #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__ #include "OperationPass.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" namespace onert { @@ -30,8 +30,8 @@ namespace pass class LoweredOperationPass : public OperationPass { public: - LoweredOperationPass(LoweredGraph &lowered_graph) - : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} + LoweredOperationPass(ILoweredGraph &lowered_graph) + : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING } @@ -39,10 +39,10 @@ public: virtual ~LoweredOperationPass() = default; std::string id() override = 0; - void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0; + void callback(const ir::OperationIndex &i, ir::IOperation &o) override = 0; protected: - LoweredGraph &_lowered_graph; + ILoweredGraph &_lowered_graph; }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc new file mode 100644 index 000000000..e2b3f6111 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OddOutputPass.h" + +#include "ir/Graph.h" +#include "ir/operation/Permute.h" +#include "util/logging.h" +#include "util/Utils.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +void OddOutputPass::run() +{ + auto &outputs = _graph.getOutputs(); + + VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input" + << std::endl; + for (const auto &ind : outputs) + { + if (_graph.getInputs().contains(ind)) + { + auto permute_output_ind = insertPermute(ind); + // Update the output to be newly added operand + _graph.getOutputs().replace(ind, permute_output_ind); + } + } + + VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl; + std::unordered_set<ir::OperandIndex> occurence; + for (auto &&ind : outputs) + { + auto &obj = _graph.operands().at(ind); + if (occurence.count(ind) == 0) + { + occurence.insert(ind); + continue; + } + + // Panic when it is const, it must have been handled earlier in another pass + UNUSED_RELEASE(obj); + assert(!obj.isConstant()); + + auto permute_output_ind = insertPermute(ind); + ind = permute_output_ind; // Replace output index to fix output duplication + } +} + +ir::OperandIndex OddOutputPass::insertPermute(ir::OperandIndex ind) +{ + auto &obj = _graph.operands().at(ind); + auto output_ind = _graph.addOperand(obj.shape(), obj.typeInfo()); + auto &output_obj = _graph.operands().at(output_ind); + + using ir::operation::Permute; + auto permute_obj = std::make_unique<Permute>(ind, output_ind, Permute::Type::COPY); + auto permute_ind = _graph.operations().push(std::move(permute_obj)); + + output_obj.setDef(permute_ind); + obj.insertUse(permute_ind); + + VERBOSE(OddOutputPass) << "Permute Op inserted for a constant output, node index : " + << permute_ind << std::endl; + VERBOSE(OddOutputPass) << " - Input (original) Operand : " << ind << std::endl; + VERBOSE(OddOutputPass) << " - Output(inserted) Operand : " << output_ind << std::endl; + + return output_ind; +} + +} // namespace pass +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.h b/runtime/onert/core/src/compiler/pass/OddOutputPass.h new file mode 100644 index 000000000..2accbac60 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__ +#define __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__ + +#include <unordered_set> + +#include "Pass.h" +#include "ir/Index.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +/** + * @brief Pass to specially handle odd outputs in a subgraph + * + * Runtime Graph IR requires every input or output must have distinct tensor index, this is onert's + * restriction. However we allow duplication of indices in the models(or API). So we should + * transform the graph after model-loading. + * + * This is necessary since our API lets users to set different buffers for each input and output so + * it is unavoidable that we must copy the value at runtime. + * + * Note that this is a mandatory pass for Graph. + * + * Case 1 : An operand which is a model output and a model input + * + * Create an operand and insert a Permute(copy) op between them. And change the output to be the + * newly generated operand. + * + * e.g.) + * + * ``` + * ((#0 Input0 and also Output0)) + * becomes + * ((#0 Input0)) -> [#0 Permute] -> ((#1 Output0)) + * ``` + * + * Case 2 : Two or more duplicated outputs + * + * Do the same with Case 1, but between two outputs of the same tensor index. + * + * e.g.) + * + * ``` + * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0 and also Output1)) + * becomes + * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0)) [#1 Permute] -> ((#2 Output1)) + * ``` + * + */ +class OddOutputPass : public Pass +{ +public: + using Pass::Pass; + +public: + std::string id() final { return "OddOutputPass"; } + +public: + void run() override; + +private: + ir::OperandIndex insertPermute(ir::OperandIndex input); +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__ diff --git a/runtime/onert/core/src/compiler/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc index 50c001c30..db8ebedcd 100644 --- a/runtime/onert/core/src/compiler/pass/OperandPass.cc +++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc @@ -28,7 +28,7 @@ namespace pass void OperandPass::run() { _graph.operands().iterate( - [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); }); + [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); }); } } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc index d7a55cb22..bd9bcb4a4 100644 --- a/runtime/onert/core/src/compiler/pass/OperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc @@ -17,7 +17,7 @@ #include "OperationPass.h" #include "ir/Index.h" -#include "ir/Operation.h" +#include "ir/IOperation.h" #include "ir/Graph.h" namespace onert @@ -30,7 +30,7 @@ namespace pass void OperationPass::run() { _graph.operations().iterate( - [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); }); + [&](const ir::OperationIndex &index, ir::IOperation &node) { callback(index, node); }); } } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h index ac4d818a2..0a00b11d1 100644 --- a/runtime/onert/core/src/compiler/pass/OperationPass.h +++ b/runtime/onert/core/src/compiler/pass/OperationPass.h @@ -29,7 +29,7 @@ namespace onert { namespace ir { -class Operation; +struct IOperation; } // namespace ir } // namespace onert @@ -62,7 +62,7 @@ public: * @param index is the index of a node in graph * @param node is the node in graph */ - virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0; + virtual void callback(const ir::OperationIndex &index, ir::IOperation &node) = 0; /** * @brief Run the pass diff --git a/runtime/onert/core/src/compiler/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h index 3f356c337..b34695c97 100644 --- a/runtime/onert/core/src/compiler/pass/Pass.h +++ b/runtime/onert/core/src/compiler/pass/Pass.h @@ -17,6 +17,8 @@ #ifndef __ONERT_COMPILER_PASS_PASS_H__ #define __ONERT_COMPILER_PASS_PASS_H__ +#include "IPass.h" + #include <string> namespace onert @@ -24,7 +26,7 @@ namespace onert namespace ir { class Graph; -} // namespace compiler +} // namespace ir } // namespace onert namespace onert @@ -34,7 +36,7 @@ namespace compiler namespace pass { -class Pass +class Pass : public IPass { public: Pass(ir::Graph &graph) : _graph{graph} {} diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc new file mode 100644 index 000000000..cd1b82bb2 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PassRunner.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +PassRunner &PassRunner::append(std::unique_ptr<IPass> pass) +{ + _passes.emplace_back(std::move(pass)); + return *this; +} + +void PassRunner::run() +{ + for (auto &&pass : _passes) + { + VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl; + pass->run(); + VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl; + // TODO Dump graph? + } +} + +} // namespace pass +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.h b/runtime/onert/core/src/compiler/pass/PassRunner.h new file mode 100644 index 000000000..03bfbe220 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/PassRunner.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_PASS_RUNNER_H__ +#define __ONERT_COMPILER_PASS_PASS_RUNNER_H__ + +#include <initializer_list> +#include <memory> +#include <vector> + +#include "IPass.h" +#include "util/logging.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +/** + * @brief Composite passes with logging + */ +class PassRunner +{ +public: + PassRunner() = default; + PassRunner &append(std::unique_ptr<IPass> pass); + + void run(); + +private: + std::vector<std::unique_ptr<IPass>> _passes; +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_PASS_RUNNER_H__ diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc index f01697034..d9452c7f9 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc @@ -15,8 +15,8 @@ */ #include "PermutationEliminationPass.h" -#include "backend/controlflow/Config.h" +#include "backend/Backend.h" #include "util/logging.h" namespace onert @@ -26,7 +26,7 @@ namespace compiler namespace pass { -void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node) +void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::IOperation &node) { _op_ind = ind; node.accept(*this); @@ -39,8 +39,9 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) // Check if two tensors are both portable if not, we can't eliminate the node { - auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement(); - auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement(); + auto &operand_li_map = _lowered_graph.lower_info().operand; + auto in_def_factor = operand_li_map.getRawPtr(in_operand)->def_factors().getOnlyElement(); + auto out_def_factor = operand_li_map.getRawPtr(out_operand)->def_factors().getOnlyElement(); auto in_config = in_def_factor.backend()->config(); auto out_config = out_def_factor.backend()->config(); @@ -53,59 +54,50 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) if (_graph.getOutputs().contains(out_operand)) { + // If the input is a const, we cannot remove it since we cannot put the constant data in the + // output buffer during prepare phase. + auto permute_input = node.getInputs().at(0); + if (_graph.operands().at(permute_input).isConstant()) + return; + // If the input is a model input, we cannot remove it since our API lets users to set different + // buffers for inputs and outputs even though one tensor is both at the same time. + auto permute_output = node.getOutputs().at(0); + if (_graph.getInputs().contains(permute_input) && _graph.getOutputs().contains(permute_output)) + return; + // Likewise, if copying between outputs to outputs, keep it. + if (_graph.getOutputs().contains(permute_input) && _graph.getOutputs().contains(permute_output)) + return; + // Exceptional case : When the output operand is a model output // In this case we keep the output and remove the input auto &out_operand_obj = _graph.operands().at(out_operand); assert(out_operand_obj.getDef() == _op_ind); out_operand_obj.unsetDef(); - _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - if (!op_seq.getOutputs().contains(in_operand)) + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { + if (!op.getOutputs().contains(in_operand)) return; - - // Update OpSequence/ir::Operation edges and ir::Operand edges - op_seq.replaceOutputs(in_operand, out_operand); - for (auto op : op_seq.operations()) - { - auto &operation_obj = _graph.operations().at(op); - if (operation_obj.getOutputs().contains(in_operand)) - { - operation_obj.replaceOutputs(in_operand, out_operand); - out_operand_obj.setDef(op); - } - } + // Update Operation and Operand edges + op.replaceOutputs(in_operand, out_operand); + out_operand_obj.setDef(op_ind); }); - // Remove Permute operation, enclosing OpSequence and the operand + // Remove Permute operation and the operand { _graph.removeOperand(in_operand); - - auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind); - // Assumes enclosing OpSequence contatins just this Permute operation - assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1); - _lowered_graph.op_seqs().remove(op_seq_ind); _graph.operations().remove(_op_ind); } - _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - if (!op_seq.getInputs().contains(in_operand)) + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { + if (!op.getInputs().contains(in_operand)) return; - - op_seq.replaceInputs(in_operand, out_operand); - for (auto op : op_seq.operations()) - { - auto &operation_obj = _graph.operations().at(op); - if (operation_obj.getInputs().contains(in_operand)) - { - operation_obj.replaceInputs(in_operand, out_operand); - out_operand_obj.insertUse(op); - } - } + op.replaceInputs(in_operand, out_operand); + out_operand_obj.insertUse(op_ind); }); VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl; - VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl; - VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl; + VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl; + VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl; } else { @@ -114,37 +106,23 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node) auto &in_operand_obj = _graph.operands().at(in_operand); in_operand_obj.removeUse(_op_ind); - // Make OpSequences(that use the output) use the input - _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { - if (!op_seq.getInputs().contains(out_operand)) + // Make operations(that use the output) use the input + _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) { + if (!op.getInputs().contains(out_operand)) return; - - op_seq.replaceInputs(out_operand, in_operand); - for (auto op : op_seq.operations()) - { - auto &operation_obj = _graph.operations().at(op); - if (operation_obj.getInputs().contains(out_operand)) - { - operation_obj.replaceInputs(out_operand, in_operand); - in_operand_obj.insertUse(op); - } - } + op.replaceInputs(out_operand, in_operand); + in_operand_obj.insertUse(op_ind); }); - // Remove Permute operation, enclosing OpSequence and the operand + // Remove the Permute operation and out_operand { _graph.removeOperand(out_operand); - - auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind); - // Assumes enclosing OpSequence contatins just this Permute operation - assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1); - _lowered_graph.op_seqs().remove(op_seq_ind); _graph.operations().remove(_op_ind); } - VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl; - VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl; - VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl; + VERBOSE(removePermute) << "Permute Op removed : " << _op_ind << std::endl; + VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl; + VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl; } } diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h index 29daf1a82..18ba99804 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h @@ -35,7 +35,7 @@ namespace pass * are compatible and layouts match. * * Permute input tensor is kept and the output is removed for all the cases, except model outputs. - * As all output tensors have to be controlflow backend, so the output is kept. + * As all output tensors have to be builtin backend, so the output is kept. * * @note This is an optimization pass which means that everything should work fine even if this pass * was skipped. @@ -49,7 +49,7 @@ public: std::string id() final { return "PermutationEliminationPass"; } public: - void callback(const ir::OperationIndex &i, ir::Operation &n) final; + void callback(const ir::OperationIndex &i, ir::IOperation &n) final; private: void visit(const ir::operation::Permute &) final; diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index c83a72ada..11c22778e 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -9,6 +9,7 @@ * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. @@ -16,18 +17,16 @@ #include "PermutationInsertionPass.h" -#include <cassert> -#include <utility> -#include <unordered_map> +#include "../../backend/builtin/Config.h" -#include "backend/controlflow/Config.h" -#include "ir/Operand.h" -#include "ir/operation/LowerInfo.h" -#include "ir/Graph.h" -#include "backend/IConfig.h" +#include "compiler/OperationLowerInfo.h" +#include "ir/operation/Permute.h" #include "util/logging.h" + +#include <cassert> #include <memory> -#include "ir/operation/Permute.h" +#include <unordered_map> +#include <utility> namespace onert { @@ -38,7 +37,8 @@ namespace pass void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object) { - auto &&operand_li = _lowered_graph.getLowerInfo(index); + auto &operand_li_map = _lowered_graph.lower_info().operand; + auto &&operand_li = operand_li_map.getRawPtr(index); assert(operand_li); // NOTE Later, constants also will have Def @@ -51,16 +51,16 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera std::list<ir::OperationIndex> permute_indexes; // Build a map for all necessary type of operands - std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index; + std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index; { assert(operand_li->def_factors().size() == 1); - for (auto factor : operand_li->def_factors()) + for (auto &&factor : operand_li->def_factors()) { factor_to_index.emplace(factor, index); } auto insert_set = operand_li->use_factors() - operand_li->def_factors(); - for (auto factor : insert_set) + for (auto &&factor : insert_set) { const auto permute_operation_index = insertPermute(index, factor); permute_indexes.push_back(permute_operation_index); @@ -75,33 +75,22 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera std::list<ir::OperationIndex> remove_list; auto uses = object.getUses(); - for (auto use : uses) + for (auto &&use : uses) { // If permute operation, ignore it if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end()) continue; auto &operation = _graph.operations().at(use); - assert(_lowered_graph.op_seqs().containsOperation(use)); - auto op_seq_index = _lowered_graph.op_seqs().getOperation(use); - auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index); - assert(op_seq_li); - const auto op_seq_layout = op_seq_li->layout(); - const backend::Backend *backend = op_seq_li->backend(); + auto op_li = _lowered_graph.lower_info().operation.getRawPtr(use); + assert(op_li); + const auto op_layout = op_li->layout(); + const backend::Backend *backend = op_li->backend(); assert(backend); - auto use_node_inputs = operation.getInputs(); - assert(use_node_inputs.contains(index)); - auto new_index = factor_to_index.at({backend, op_seq_layout}); + auto new_index = factor_to_index.at({backend, op_layout}); if (index != new_index) { - // Update from op_seq - // Replace the same inputs of an OpSequence at once for the following reasons: - // 1. An OpSequence's inputs are the same inputs of first operation - // 2. An OpSequence may have inputs as the same operand (2 or more). - // 3. The same inputs of OpSequence have the same PermuteFactor. - _lowered_graph.op_seqs().at(op_seq_index).replaceInputs(index, new_index); - // Update from operation // Replace the same inputs of an operation at once for the following reasons: // No. 2 and 3 above @@ -109,63 +98,69 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera // Update from operand remove_list.push_back( - use); // Removal should be done in another loop since we are in the loop + use); // Removal should be done in another loop since we are in the loop _graph.operands().at(new_index).insertUse(use); } } - for (auto &operation : remove_list) + for (const auto &operation_index : remove_list) { - object.removeUse(operation); + object.removeUse(operation_index); } } } ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index, - const ir::operand::PermuteFactor &factor) + const PermuteFactor &factor) { - assert(!_graph.isBuildingPhase()); - auto &operand = _graph.operands().at(operand_index); // Generate output operand and permute operation auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo()); - // change model output if operand_index is model output index + // change model output if operand_index is model output index and the out operand is builtin + // backend auto &model_outputs = _graph.getOutputs(); - if (model_outputs.contains(operand_index)) + const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin(); + assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID); + + if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend) { model_outputs.replace(operand_index, out_operand_index); } + auto &operand_li_map = _lowered_graph.lower_info().operand; + // Find Permute information - auto input_factor = _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement(); + auto input_factor = operand_li_map.getRawPtr(operand_index)->def_factors().getOnlyElement(); auto input_backend = input_factor.backend(); auto output_backend = factor.backend(); // NOTE Permute may not have specific layout because the layout of input and output may be // different. const auto permute_node_layout = ir::Layout::UNKNOWN; // NOTE If one backend supports several layout, the backend must support Permute operation - const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow(); + const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin(); + assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID); + if (input_backend == output_backend) { permute_node_backend = input_backend; } - const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout}; + const PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout}; // Update LowerInfo of input operand - auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index); + auto operand_lower_info = operand_li_map.getRawPtr(operand_index); operand_lower_info->removeUsePermuteFactor(factor); operand_lower_info->addUsePermuteFactor(permute_node_factor); // Update LowerInfo of output operand - auto out_operand_li = std::make_unique<ir::operand::LowerInfo>(); + auto out_operand_li = std::make_unique<compiler::OperandLowerInfo>(); // The input and output factors of all nodes will be the same except Permute. So Tensor's // allocators allocates memory using only the information of def permutation factor now. // TODO Change param to permute_node_factor out_operand_li->addDefPermuteFactor(factor); out_operand_li->addUsePermuteFactor(factor); - _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li)); + operand_li_map.set(out_operand_index, std::move(out_operand_li)); // Insert permute operation to the graph const auto input_layout = input_factor.layout(); @@ -188,20 +183,18 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, permute_type); auto node_index = _graph.operations().push(std::move(insert_node)); - const auto &node = _graph.operations().at(node_index); VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl; - VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl; - VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl; + VERBOSE_F() << " - Input (original) Operand : " << operand_index << "(" + << input_factor.backend()->config()->id() << ")" << std::endl; + VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "(" + << factor.backend()->config()->id() << ")" << std::endl; - // OpSequence + // Operation LowerInfo { - auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, permute_node_layout); - auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index); - op_seq.setInputs(node.getInputs()); - op_seq.setOutputs(node.getOutputs()); - _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>( - permute_node_backend, permute_node_layout)); + auto &operation_li_map = _lowered_graph.lower_info().operation; + operation_li_map.set(node_index, std::make_unique<compiler::OperationLowerInfo>( + permute_node_backend, permute_node_layout)); } // Update Use/Def info diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h index 758515385..ee0a1464c 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h @@ -20,7 +20,7 @@ #include "LoweredOperandPass.h" #include "compiler/BackendManager.h" #include "ir/Operand.h" -#include "ir/operand/PermuteFactor.h" +#include "compiler/PermuteFactor.h" namespace onert { @@ -48,7 +48,7 @@ private: * @return ir::OperationIndex */ ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index, - const ir::operand::PermuteFactor &factor); + const PermuteFactor &factor); }; } // namespace pass diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc index c5c95c726..f014d29d3 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc @@ -30,10 +30,10 @@ namespace pass using namespace ir; -void PermutationOperationPass::callback(const OperationIndex &, Operation &node) +void PermutationOperationPass::callback(const OperationIndex &, IOperation &node) { node.accept(*this); -}; +} // TODO Remove this. Expanding ranks of Operand is dangerous void PermutationOperationPass::applyExpandRanks(const Operation &node) @@ -43,9 +43,8 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node) assert(output.getDef().valid()); const auto node_index = output.getDef(); - const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index); - const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout(); - const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout(); + const auto frontend_layout = _graph.layout(); + const auto backend_layout = _lowered_graph.lower_info().operation.getRawPtr(node_index)->layout(); if (frontend_layout == backend_layout) { @@ -84,10 +83,11 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) assert(output_obj.getDef().valid()); const auto node_index = output_obj.getDef(); - const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index); - const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout(); - const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout(); + auto &operation_li_map = _lowered_graph.lower_info().operation; + auto &operand_li_map = _lowered_graph.lower_info().operand; + const auto frontend_layout = _graph.layout(); + const auto backend_layout = operation_li_map.getRawPtr(node_index)->layout(); if (frontend_layout == backend_layout) { @@ -97,96 +97,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) // Permutation changing layout beyond 4-D is not supported yet assert(output_obj.shape().rank() <= 4); - // Divide op_seq based on target operation - { - auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index); - auto &operations = _lowered_graph.graph().operations(); - - // Create new op_seq and move information from existing op_seq to new op_seq if target - // node is the end of op_seq - auto it = prev_op_seq.begin(); - // Find iterator of target node in op_seq - while (*(it++) != node_index) - ; - if (it != prev_op_seq.end()) - { - const auto &target_op_idx = *it; - const auto &target_node = operations.at(target_op_idx); - const auto &next_op_seq_index = - _lowered_graph.op_seqs().emplace(target_op_idx, prev_op_seq.getLayout()); - auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index); - next_op_seq.setInputs(target_node.getInputs()); - next_op_seq.setOutputs(target_node.getOutputs()); - - std::vector<OperationIndex> remove_list; - remove_list.emplace_back(target_op_idx); - while (++it != prev_op_seq.end()) - { - next_op_seq.appendOperation(target_op_idx); - next_op_seq.setOutputs(target_node.getOutputs()); - remove_list.emplace_back(target_op_idx); - } - - prev_op_seq.setOutputs(node.getOutputs()); - for (const auto &index : remove_list) - { - prev_op_seq.remove(index); - } - - const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index); - _lowered_graph.setLowerInfo( - next_op_seq_index, - std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout())); - } - } - - // Remove target operation from op_seq and insert the target operation to new op_seq + // Change PermuteFactors of operands and the operation of target node { - const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend(); + const auto op_li = operation_li_map.getRawPtr(node_index); + const auto backend = op_li->backend(); - // Remove target operation from op_sequence - _lowered_graph.op_seqs().removeFromOpSequence(node_index); + operation_li_map.set(node_index, + std::make_unique<compiler::OperationLowerInfo>(backend, frontend_layout)); - if (!_lowered_graph.op_seqs().exist(op_seq_index)) - { - // Remove lowerinfo for op_seq of target operation if the op_seq does not exist - _lowered_graph.removeLowerInfo(op_seq_index); - } - else - { - // Update op_seq of target operation if the op_seq exists - auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index); - const auto &last_node_idx = *(--prev_op_seq.end()); - const auto &last_node = _lowered_graph.graph().operations().at(last_node_idx); - prev_op_seq.setOutputs(last_node.getOutputs()); - } - - // Create new op_seq and set information to the op_seq - auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, frontend_layout); - auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index); - new_op_seq.setInputs(node.getInputs()); - new_op_seq.setOutputs(node.getOutputs()); - _lowered_graph.setLowerInfo( - new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout)); - } - - // Change PermuteFactors of operands of target node - { - const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index); - const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index); - const auto backend = op_seq_li->backend(); - const operand::PermuteFactor removed_factor{backend, backend_layout}; - const operand::PermuteFactor new_factor{backend, frontend_layout}; + const PermuteFactor removed_factor{backend, backend_layout}; + const PermuteFactor new_factor{backend, frontend_layout}; for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED) { + // Check if it can be removed by checking if the operand is used by another operation and + // it uses the same backend and layout bool canRemove = true; for (const auto &use : _graph.operands().at(input).getUses()) { if (use != node_index) { - const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use); - auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index); - if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout) + auto use_op_li = operation_li_map.getRawPtr(use); + if (use_op_li->backend() == backend && use_op_li->layout() == backend_layout) { canRemove = false; break; @@ -194,27 +125,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) } } - auto lower_info = _lowered_graph.getLowerInfo(input); + auto input_li = operand_li_map.getRawPtr(input); if (canRemove) { - lower_info->removeUsePermuteFactor(removed_factor); + input_li->removeUsePermuteFactor(removed_factor); } - lower_info->addUsePermuteFactor(new_factor); + input_li->addUsePermuteFactor(new_factor); // Whether if node's input is an input of model or a constant if (!_graph.operands().at(input).getDef().valid() && - (lower_info->def_factors().size() == 1 && - lower_info->def_factors().getOnlyElement() == removed_factor)) + (input_li->def_factors().size() == 1 && + input_li->def_factors().getOnlyElement() == removed_factor)) { assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant()); - lower_info->removeDefPermuteFactor(removed_factor); - lower_info->addDefPermuteFactor(new_factor); + input_li->removeDefPermuteFactor(removed_factor); + input_li->addDefPermuteFactor(new_factor); } } - for (const auto &output : node.getOutputs() | Remove::DUPLICATED) + for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED) { - auto lower_info = _lowered_graph.getLowerInfo(output); + auto lower_info = operand_li_map.getRawPtr(output); lower_info->removeDefPermuteFactor(removed_factor); lower_info->addDefPermuteFactor(new_factor); @@ -279,6 +210,18 @@ void PermutationOperationPass::visit(const ir::operation::Gather &node) } } +void PermutationOperationPass::visit(const ir::operation::OneHot &node) +{ + const auto &output_ind = node.getOutputs().at(0); + const auto &output_obj = _graph.operands().at(output_ind); + const auto &output_shape = output_obj.shape(); + + if (output_shape.rank() >= 4) + { + changeToKeepLayout(node); + } +} + void PermutationOperationPass::visit(const ir::operation::Pack &node) { const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT); diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h index 2dd76b971..e253a77ad 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h @@ -36,7 +36,7 @@ public: std::string id() final { return "PermutationOperationPass"; } public: - void callback(const ir::OperationIndex &i, ir::Operation &n) final; + void callback(const ir::OperationIndex &i, ir::IOperation &n) final; public: void visit(const ir::operation::BinaryArithmetic &) final; @@ -44,6 +44,7 @@ public: void visit(const ir::operation::Concat &) final; void visit(const ir::operation::ElementwiseBinary &) final; void visit(const ir::operation::ElementwiseUnary &) final; + void visit(const ir::operation::OneHot &) final; void visit(const ir::operation::Pack &) final; void visit(const ir::operation::PReLU &) final; void visit(const ir::operation::SquaredDifference &) final; diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc new file mode 100644 index 000000000..162c4e7ef --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Pass.h" + +#include "UnusedOperandEliminationPass.h" +#include "ir/Index.h" +#include "util/Set.h" +#include "ir/Graph.h" + +/** + * @file UnusedOperandEliminationPass.cc + * @brief This file contains UnusedOperandEliminationPass class implementation + */ + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +void UnusedOperandEliminationPass::run() +{ + util::Set<ir::OperandIndex> used; + + _graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &node) { + for (auto &&ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED) + { + used.add(ind); + } + }); + + // Graph's inputs/outputs are always considered as used + for (auto &&ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED) + { + used.add(ind); + } + + _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { + if (!used.contains(ind)) + { + VERBOSE() << "Remove unused operand " << ind << std::endl; + _graph.operands().remove(ind); + } + }); +} + +} // namespace pass +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h new file mode 100644 index 000000000..8078f4246 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file UnusedOperandEliminationPass.h + * @brief This file contains UnusedOperandEliminationPass class + */ + +#ifndef __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__ +#define __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__ + +#include "Pass.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +/** + * @brief A pass to eliminate unused operands from the graph + * + * Remove operands that are not used by any operations, except Graph inputs/outputs. + * + */ +class UnusedOperandEliminationPass : public Pass +{ +public: + using Pass::Pass; + +public: + std::string id() override { return "UnusedOperandEliminationPass"; } + void run() final; +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__ diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc new file mode 100644 index 000000000..572b4df24 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UnusedOperandEliminationPass.h" + +#include "ir/Graph.h" + +#include <gtest/gtest.h> + +using namespace onert::ir; +using namespace onert::compiler::pass; + +TEST(UnusedOperandEliminationPass, Simple) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + auto unused = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + + UnusedOperandEliminationPass{graph}.run(); + + ASSERT_TRUE(graph.operands().exist(in)); + ASSERT_TRUE(graph.operands().exist(out)); + ASSERT_FALSE(graph.operands().exist(unused)); +} diff --git a/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc new file mode 100644 index 000000000..490c648cd --- /dev/null +++ b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/train/LoweredTrainableGraph.h" + +#include "../ManualScheduler.h" +#include "../pass/ConstantInsertionPass.h" +#include "../pass/ConstantLoweringPass.h" +#include "../pass/PassRunner.h" +#include "../pass/PermutationEliminationPass.h" +#include "../pass/PermutationInsertionPass.h" +#include "../pass/PermutationOperationPass.h" +#include "../../backend/builtin/Config.h" +#include "../../dumper/text/GraphDumper.h" +#include "../../ir/verifier/Verifier.h" +#include "TrainableOperationConverter.h" + +#include "backend/Backend.h" +#include "backend/train/ITrainableBackend.h" +#include "compiler/BackendResolver.h" +#include "util/logging.h" + +#include <cassert> +#include <sstream> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +LoweredTrainableGraph::LoweredTrainableGraph(ir::train::TrainableGraph &graph, + const CompilerOptions &options) + : _trainable_graph{graph} +{ + lowerGraph(options); +} + +void LoweredTrainableGraph::lowerGraph(const CompilerOptions &options) +{ + // Build backend contexts + auto &backend_manager = BackendManager::get(); + // Create contexts for other backends + for (auto &&backend_str : options.backend_list) + { + backend_manager.loadBackend(backend_str); + auto backend = backend_manager.get(backend_str); + + // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some + // are not available on x64 or some other platforms. So this may be a workaround for x64 and + // we should change it back(throw if backend is not loaded) later. + if (!backend) + { + VERBOSE(LoweredTrainableGraph) << "Cannot load backend - " << backend_str << std::endl; + continue; + } + } + if (backend_manager.num_backends() == 0) + throw std::runtime_error{"No available backends loaded."}; + + // TODO Move "schedule" phase out of here + // TODO Scheduling + std::unique_ptr<BackendResolver> backend_resolver; + auto all_backends = backend_manager.getAll(); + + auto scheduler = ManualScheduler(all_backends, options); + backend_resolver = scheduler.schedule(_trainable_graph.graph()); + + // Check if backends are trainable + _trainable_graph.operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &) { + const auto backend = backend_resolver->getBackend(op_ind); + + // TODO Remove dynamic_cast + if (dynamic_cast<const backend::train::ITrainableBackend *>(backend) == nullptr) + { + throw std::runtime_error(backend->config()->id() + "backend does not support training"); + } + }); + + makeLowerInfo(*backend_resolver); + VERBOSE(LoweredTrainableGraph) << "dump before mandatory passes" << std::endl; + dumper::text::dumpLoweredGraph(*this); + + // Mandatory passes - kind of legalization(?) + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::ConstantInsertionPass>(*this)) + .append(std::make_unique<compiler::pass::ConstantLoweringPass>(*this)) + .append(std::make_unique<compiler::pass::PermutationOperationPass>(*this)) + .append(std::make_unique<compiler::pass::PermutationInsertionPass>(*this)) + .run(); + + // TODO Move converting Permute op into PermutationInsertionPass + auto op_converter = TrainableOperationConverter{_trainable_graph, nullptr}; + _trainable_graph.operations().iterate( + [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) { + if (op.opcode() == ir::OpCode::Permute) + { + auto trainable_op = op_converter(op); + auto gen_index = _trainable_graph.replaceOperation(index, std::move(trainable_op)); + UNUSED_RELEASE(gen_index); + assert(gen_index == index); + } + }); + + dumpLowerInfo(); + + // Optimization passes (optional) + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::PermutationEliminationPass>(*this)) + .run(); + + // TODO Update LowerInfo for training + + VERBOSE(LoweredTrainableGraph) << "Dump after all the passes" << std::endl; + for (auto &&operand : _trainable_graph.getInputs()) + VERBOSE(LoweredTrainableGraph) << "Graph Input : " << operand << std::endl; + for (auto &&operand : _trainable_graph.getOutputs()) + VERBOSE(LoweredTrainableGraph) << "Graph Output : " << operand << std::endl; + dumper::text::dumpLoweredGraph(*this); + + // Graph verifications + { + assert(ir::verifier::InputOutputChecker().verify(_trainable_graph.graph())); + assert(ir::verifier::DAGChecker().verify(_trainable_graph.graph())); + assert(ir::verifier::EdgeChecker().verify(_trainable_graph.graph())); + } +} + +void LoweredTrainableGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver) +{ + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + lower_info().operand.set(index, std::make_unique<OperandLowerInfo>()); + }); + + // Set operand lower info using assigned backends to operations + _trainable_graph.operations().iterate( + [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto backend = backend_resolver.getBackend(op_ind); + if (!backend) + { + throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"}; + } + + auto frontend_layout = _trainable_graph.layout(); + + // The layout of each backend should be set at another place + // TODO Change setting layout of each backend at another place + auto backend_layout = backend->config()->supportLayout(op, frontend_layout); + + for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(ind); + operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout}); + } + for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(ind); + operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout}); + } + lower_info().operation.set( + op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout)); + }); + + // Handle graph inputs and outputs + const auto builtin_backend = BackendManager::get().getBuiltin(); + auto factor = PermuteFactor{builtin_backend, _trainable_graph.layout()}; + for (auto &&index : _trainable_graph.getInputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(factor); + } + for (auto &&index : _trainable_graph.getOutputs() | ir::Remove::UNDEFINED) + { + auto &operand_li = lower_info().operand.at(index); + operand_li.addUsePermuteFactor(factor); + } + + // Handle variable tensors + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) { + // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs + // and not undefined operand - these are variable tensors. For example, + // UnidirectionalSequenceLSTM has such inputs. + if (operand.info().isVariable()) + { + // The variable operand with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + auto operand_li = lower_info().operand.at(index); + assert(operand_li.def_factors().empty()); + operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement()); + } + }); +} + +void LoweredTrainableGraph::dumpLowerInfo() +{ + if (::onert::util::logging::ctx.enabled() == false) + return; + + std::map<uint32_t, std::string> dumps; + + _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) { + const auto operand_lower_info = lower_info().operand.getRawPtr(index); + assert(operand_lower_info); + if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty()) + { + auto shape_to_string = [](const ir::Shape &shape) { + std::stringstream sstream; + sstream << "{ "; + for (auto i = 0; i < shape.rank(); ++i) + sstream << (shape.dim(i)) << " "; + sstream << "}"; + return sstream.str(); + }; + + auto factors_to_string = [](const PermuteFactorSet &factors) { + std::string str; + for (auto &&factor : factors) + { + str += factor.backend()->config()->id(); + str += "(" + to_string(factor.layout()) + ")"; + str += " "; + } + return "{ " + str + "}"; + }; + + auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) { + std::stringstream sstream; + sstream << "{ "; + for (auto &&op : operations) + sstream << op << " "; + sstream << "}"; + return sstream.str(); + }; + + auto data_to_str = [](const ir::Data *data) { + return (data ? (std::to_string(data->size()) + " bytes") : "N/A"); + }; + + std::string shape_str = shape_to_string(object.shape()); + std::string def_op = operation_index_set_to_string({object.getDef()}); + std::string use_ops = operation_index_set_to_string(object.getUses()); + std::string def_factors = factors_to_string(operand_lower_info->def_factors()); + std::string use_factors = factors_to_string(operand_lower_info->use_factors()); + std::stringstream sstream; + sstream << "Operand " << index << " Info" << std::endl; + sstream << " - Shape : " << shape_str << std::endl; + sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl; + sstream << " - Data : " << data_to_str(object.data()) << std::endl; + sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl; + dumps.emplace(index.value(), sstream.str()); + } + }); + + for (const auto &e : dumps) + { + if (!e.second.empty()) + { + std::istringstream iss(e.second); + std::string line; + while (std::getline(iss, line)) + VERBOSE(Lower) << line << std::endl; + } + } +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc new file mode 100644 index 000000000..d2153296f --- /dev/null +++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "StaticDerivativeShapeInferer.h" +#include "util/ShapeInference.h" +#include "util/logging.h" + +#include <misc/polymorphic_downcast.h> + +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +void StaticDerivativeShapeInferer::infer() +{ + // It is not determined to iterate in reverse order. + auto sorted_ops = _lowered_subg->graph().topolSortOperations(); + for (auto it = sorted_ops.rbegin(); it != sorted_ops.rend(); ++it) + { + const auto op_idx = *it; + const auto &op = _lowered_subg->trainable_graph().operation(op_idx); + if (checkDynamicInput(op)) + { + std::stringstream msg; + msg << "StaticDerivativeShapeInferer does not support dynamic shape yet, "; + msg << op.name() << "(op index: " << op_idx << ") has dynamic shape."; + throw std::runtime_error(msg.str()); + } + + checkOutput(op); + + op.accept(*this); + } +} + +void StaticDerivativeShapeInferer::dump() +{ + // TODO dump +} + +bool StaticDerivativeShapeInferer::checkDynamicInput(const ir::IOperation &op) +{ + const auto &operands = _lowered_subg->graph().operands(); + for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (operands.at(input_idx).info().isDynamic()) + { + return true; + } + } + + return false; +} + +void StaticDerivativeShapeInferer::checkOutput(const ir::IOperation &op) +{ + const auto &derivatives = _lowered_subg->trainable_graph().derivatives(); + for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (!derivatives.exist(output_idx)) + { + std::stringstream msg; + msg << "StaticDerivativeShapeInferer : Invalid output, "; + msg << op.name() << "'s derivative output(index: " << output_idx << ") does not exist."; + throw std::runtime_error(msg.str()); + } + } +} + +void StaticDerivativeShapeInferer::setShape(const ir::OperandIndex &index, const ir::Shape &shape) +{ + auto &tgraph = _lowered_subg->trainable_graph(); + + if (tgraph.derivatives().exist(index)) + tgraph.changeDerivativeShape(index, shape); + else + { + // NOTE This code assumes the types are always the same, but I'm not sure. + const auto &type = tgraph.operands().at(index).typeInfo(); + const auto new_index = tgraph.addDerivative(index, std::make_unique<ir::Operand>(shape, type)); + assert(new_index == index); + UNUSED_RELEASE(new_index); + } +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Conv2D &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::ElementwiseActivation &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Loss &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Permute &op) +{ + const auto &derivatives = _lowered_subg->trainable_graph().derivatives(); + + const auto &output_idx = op.getOutputs().at(0); + const auto &output = derivatives.at(output_idx); + + // re-sizing input derivative shape + const auto &input_idx = op.getInputs().at(0); + const auto &new_shape = output.info().shape(); + setShape(input_idx, new_shape); +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Pool2D &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Reshape &) +{ + // NYI +} + +void StaticDerivativeShapeInferer::visit(const ir::train::operation::Softmax &) +{ + // NYI +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h new file mode 100644 index 000000000..48b3172d2 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ +#define __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ + +#include "ir/train/TrainableOperationVisitor.h" + +#include "compiler/train/LoweredTrainableGraph.h" +#include "ir/Index.h" + +#include <memory> +#include <unordered_map> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +/** + * @brief Class to infer shape before running kernels. It does the following: + * - re-calculate and set output shape at compile time (before running kernels) + * - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning + * shapes of outputs will be calculated during running kernels + */ +class StaticDerivativeShapeInferer : public ir::train::TrainableOperationVisitor +{ +public: + StaticDerivativeShapeInferer(compiler::train::LoweredTrainableGraph *lowered_subg) + : _lowered_subg{lowered_subg} + { + } + + /** + * @brief Infer shape of operands belonging to ops and set the output shape. + * If output shape cannot be known without running op, mark it so that it can be allocated + * when running kernel. + */ + void infer(void); + + void dump(); + +private: + bool checkDynamicInput(const ir::IOperation &op); + void checkOutput(const ir::IOperation &op); + void setShape(const ir::OperandIndex &index, const ir::Shape &shape); + +private: + void visit(const ir::train::operation::Conv2D &op) override; + void visit(const ir::train::operation::ElementwiseActivation &op) override; + void visit(const ir::train::operation::Loss &op) override; + void visit(const ir::train::operation::Permute &op) override; + void visit(const ir::train::operation::Pool2D &op) override; + void visit(const ir::train::operation::Reshape &op) override; + void visit(const ir::train::operation::Softmax &op) override; + +private: + compiler::train::LoweredTrainableGraph *_lowered_subg; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__ diff --git a/runtime/onert/core/src/compiler/train/TensorRegistries.h b/runtime/onert/core/src/compiler/train/TensorRegistries.h new file mode 100644 index 000000000..48eaf10a1 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TensorRegistries.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ +#define __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ + +#include "../../backend/builtin/Config.h" +#include "../../backend/builtin/train/TensorRegistry.h" + +#include <backend/train/TrainableBackendContext.h> + +#include <memory> +#include <unordered_set> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TensorRegistries +{ +public: + TensorRegistries() = default; + + TensorRegistries(const backend::train::TrainableBackendContexts &backend_contexts, + bool include_builtin) + { + for (const auto &e : backend_contexts) + { + auto tensor_reg = e.second->tensor_registry(); + if (e.first->config()->id() == backend::builtin::Config::ID) + { + _builtin_tensor_reg = + std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(tensor_reg); + if (include_builtin) + _tensor_regs.insert(tensor_reg); + } + else + { + _tensor_regs.insert(tensor_reg); + } + } + } + + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator begin() const + { + return _tensor_regs.cbegin(); + } + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator end() const + { + return _tensor_regs.cend(); + } + + std::shared_ptr<backend::builtin::train::TensorRegistry> getBuiltinTensorRegistry() const + { + return _builtin_tensor_reg; + } + + backend::ITensor *getITensor(ir::OperandIndex index) const + { + for (auto &&tensor_reg : _tensor_regs) + { + auto tensor = tensor_reg->getITensor(index); + if (tensor) + return tensor; + } + return nullptr; + } + + backend::ITensor *getDerivativeITensor(ir::OperandIndex index) const + { + for (auto &&tensor_reg : _tensor_regs) + { + auto tensor = tensor_reg->getDerivativeITensor(index); + if (tensor) + return tensor; + } + return nullptr; + } + +private: + std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>> _tensor_regs; + std::shared_ptr<backend::builtin::train::TensorRegistry> _builtin_tensor_reg; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__ diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc new file mode 100644 index 000000000..d20ae9fd3 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableOperationConverter.h" + +#include "ir/train/Operations.Include.h" +#include "util/Utils.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +TrainableOperationConverter::TrainableOperationConverter( + ir::train::TrainableGraph &tgraph, const compiler::train::TrainingInfo *training_info) + : UntrainableOperationConverter{tgraph}, _training_info{training_info} +{ + // Avoid unused-private-field error + UNUSED_RELEASE(_training_info); +} + +void TrainableOperationConverter::visit(const ir::operation::Conv2D &node) +{ + _return_op = std::make_unique<ir::train::operation::Conv2D>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::ElementwiseActivation &node) +{ + if (node.param().op_type == ir::operation::ElementwiseActivation::Type::RELU) + { + _return_op = std::make_unique<ir::train::operation::ElementwiseActivation>(node); + } + else + { + UntrainableOperationConverter::visit(node); + } +} + +void TrainableOperationConverter::visit(const ir::operation::FullyConnected &node) +{ + _return_op = std::make_unique<ir::train::operation::FullyConnected>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Loss &node) +{ + _return_op = std::make_unique<ir::train::operation::Loss>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Permute &node) +{ + _return_op = std::make_unique<ir::train::operation::Permute>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Pool2D &node) +{ + _return_op = std::make_unique<ir::train::operation::Pool2D>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Reshape &node) +{ + _return_op = std::make_unique<ir::train::operation::Reshape>(node); +} + +void TrainableOperationConverter::visit(const ir::operation::Softmax &node) +{ + _return_op = std::make_unique<ir::train::operation::Softmax>(node); +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h new file mode 100644 index 000000000..5f6fc10c3 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ +#define __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ + +#include "UntrainableOperationConverter.h" + +#include "compiler/train/TrainingInfo.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TrainableOperationConverter : public UntrainableOperationConverter +{ +public: + TrainableOperationConverter(ir::train::TrainableGraph &trainable_graph, + const compiler::train::TrainingInfo *training_info); + + using UntrainableOperationConverter::operator(); + +private: + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Loss &node) override; + void visit(const ir::operation::Permute &node) override; + void visit(const ir::operation::Pool2D &node) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::Softmax &) override; + +private: + const compiler::train::TrainingInfo *_training_info; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__ diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.cc b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc new file mode 100644 index 000000000..711af1651 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainingCompiler.h" + +#include "StaticDerivativeShapeInferer.h" +#include "TrainableOperationConverter.h" +#include "pass/LossInsertionPass.h" +#include "../CompilerHelpers.h" +#include "../ExecutorFactory.h" +#include "../pass/ConstantOutputPass.h" +#include "../pass/OddOutputPass.h" +#include "../pass/PassRunner.h" +#include "../pass/UnusedOperandEliminationPass.h" +#include "../ShapeValidator.h" +#include "../../dumper/dot/DotDumper.h" +#include "../../exec/train/TrainableExecutors.h" +#include "../../ir/OperationDumper.h" +#include "../../ir/verifier/Verifier.h" + +#include <compiler/StaticShapeInferer.h> +#include <compiler/train/LoweredTrainableGraph.h> +#include <ir/train/TrainableGraph.h> +#include <exec/train/optimizer/SGD.h> + +#include <misc/polymorphic_downcast.h> +#include <misc/string_helpers.h> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +TrainingCompiler::TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const TrainingInfo &training_info) + : _model{nnpkg->primary_model()}, _options{copts[0].get()}, _training_info{training_info} +{ + if (nnpkg->model_count() > 1) + throw std::runtime_error("TrainingCompiler does not support multiple models yet"); + + if (nnpkg->primary_model()->subgraphs_count() > 1) + throw std::runtime_error("TrainingCompiler does not support multiple subgraphs yet"); +} + +std::shared_ptr<CompilerArtifact> TrainingCompiler::compile(void) +{ + /*************************************************** + * Prepare compilation phase + ***************************************************/ + if (!_options) + throw std::runtime_error{"Empty compile option"}; + + // Mode check + // TODO handle option for each model + if (_options->he_profiling_mode) + { + if (!_options->he_scheduler) + throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); + + if (_options->executor != "Dataflow") + throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); + } + + if (!_options->minmax_filepath.empty()) + { + if (_options->executor != "Linear") + throw std::runtime_error("Recording minmax works only with Linear executor"); + } + + _options->forceInternalOptions(); + _options->verboseOptions(); + + auto custom_kernel_builder = _model->getKernelBuilder(); + + _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) { + auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph); + // Mandatory passes + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg)) + .append(std::make_unique<compiler::pass::OddOutputPass>(subg)) + .run(); + + // Optimizations + compiler::pass::PassRunner{} + .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg)) + .run(); + }); + + std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>> + trainable_subgraphs; + + if (_model->hasOnly<ir::Graph>()) + { + // Create trainable subgraphs by copy and converting inference model + _model->iterate([&](const ir::SubgraphIndex &subg_index, const ir::IGraph &graph) { + const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph); + // Create TrainableGraph by copying Graph + auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg); + + // Convert operations to trainable operations + auto converter = TrainableOperationConverter{*trainable_subg, &_training_info}; + subg.operations().iterate( + [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &op) { + auto trainable_op = converter(op); + auto gen_index = trainable_subg->replaceOperation(op_index, std::move(trainable_op)); + UNUSED_RELEASE(gen_index); + assert(gen_index == op_index); + }); + + trainable_subgraphs[subg_index] = std::move(trainable_subg); + }); + } + else + { + // TODO Support models that have TrainableGraphs + throw std::runtime_error("TrainingCompiler: Invalid model"); + } + + // operation + _model.reset(); + + // Apply pass for trainable subgraphs + for (auto &&pair : trainable_subgraphs) + { + auto trainable_subg = pair.second; + auto subg_index = pair.first; + + compiler::pass::PassRunner{} + .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info, + subg_index)) + .run(); + } + + // Change input shape according to batch_size + for (auto &&pair : trainable_subgraphs) + { + auto trainable_subg = pair.second; + + for (const auto &ind : trainable_subg->getInputs()) + { + auto &input = trainable_subg->operands().at(ind); + auto new_shape = input.info().shape(); + // TODO Consider batch size index + if (new_shape.dim(0) != 1) + throw std::runtime_error("the first dim is not 1. It is not supported yet."); + new_shape.dim(0) = _training_info.batchSize(); + input.info().shape(new_shape); + } + } + + /*************************************************** + * Backend independent analysis & optimization phase + ***************************************************/ + // TODO Handle dump level for each model + auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + auto tracing_ctx = std::make_unique<util::TracingCtx>(); + + // Lower: Assign backend + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>> + lowered_subgs; + { + for (auto &&pair : trainable_subgraphs) + { + auto &subg_index = pair.first; + auto trainable_subg = pair.second; + + // Lower: Assign backend + lowered_subgs[subg_index] = + std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options); + // Set tracing_ctx for copied graph + if (tracing_ctx != nullptr) + tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value()); + } + } + + for (const auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + const auto &lowered_subg = pair.second; + dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value())); + } + + // Set derivatives as default tensor info + for (const auto &pair : lowered_subgs) + { + auto lowered_subg = pair.second.get(); + auto &tgraph = lowered_subg->trainable_graph(); + tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) { + if (!obj.isConstant()) + { + auto deriv = std::make_unique<ir::Operand>(obj); + const auto gen_index = tgraph.addDerivative(index, std::move(deriv)); + assert(gen_index == index); + UNUSED_RELEASE(gen_index); + } + }); + } + + // Shape inference. + { + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers = + createStaticShapeInferers(lowered_subgs); + + const auto primary_subg_idx = ir::SubgraphIndex{0}; + inferers.at(primary_subg_idx)->infer(); + + for (const auto &pair_inferer : inferers) + { + const auto inferer = pair_inferer.second.get(); + inferer->dump(); + } + + // NOTE StaticDerivativeShapeInferer is allocated for each subgraph, + // so it does not support models that have controlflow operations yet. + for (auto &&pair : lowered_subgs) + { + auto &lowered_subg = pair.second; + auto inferer = std::make_unique<StaticDerivativeShapeInferer>(lowered_subg.get()); + inferer->infer(); + inferer->dump(); + } + } + + // Shape validation + for (const auto &pair : lowered_subgs) + { + auto &lowered_subg = pair.second; + compiler::ShapeValidator{lowered_subg->graph()}(); + } + + // TODO Validate shapes of derivative tensors + + // Create optimizer + // TODO Set properties of optimizer + std::shared_ptr<exec::train::optimizer::Optimizer> optimizer; + const auto &optim_info = _training_info.optimizerInfo(); + if (optim_info.optim_code == exec::train::optimizer::OptimizerCode::SGD) + optimizer = std::make_shared<exec::train::optimizer::SGD>(optim_info.learning_rate); + else + throw std::runtime_error("Invalid optimizer type, " + + exec::train::optimizer::toString(optim_info.optim_code)); + + /************************************************************* + * Backend independent analysis & optimization phase finished + *************************************************************/ + auto executors = std::make_shared<exec::train::TrainableExecutors>(); + for (auto &&pair : lowered_subgs) + { + auto const model_index = ir::ModelIndex{0}; + auto const subg_index = pair.first; + auto &lowered_subg = pair.second; + auto const indexed_ranks = lowered_subg->indexed_ranks(); + + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); + lowered_subg->graph().operations().iterate( + [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); }); + + ExecutorFactoryArgs args; + args.tracing_ctx = tracing_ctx.get(); + args.options = _options; + args.model_index = model_index; + args.custom_kernel_builder = custom_kernel_builder; + auto executor = std::unique_ptr<exec::IExecutor>{ + ExecutorFactory::get().create(std::move(lowered_subg), executors, args, optimizer)}; + executor->setIndexedRanks(indexed_ranks); + executors->emplace(model_index, subg_index, std::move(executor)); + } + + /******************************** + * Code generation phase finished + ********************************/ + return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx)); +} + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.h b/runtime/onert/core/src/compiler/train/TrainingCompiler.h new file mode 100644 index 000000000..b93437217 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file TrainingCompiler.h + * @brief This file contains TrainingCompiler class to define and run compilation phase + */ + +#ifndef __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ +#define __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ + +#include "compiler/CompilerOptions.h" +#include "compiler/ICompiler.h" +#include "compiler/train/TrainingInfo.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +/** + * @brief Class to compile NN package + */ +class TrainingCompiler : public ICompiler +{ +public: + /** + * @brief Construct a new TrainingCompiler object for single model + * @param[in] model model to compile + * @param[in] inference_compiler Compiler for inference + * @param[in] coptions Compiler Options + * @param[in] training_info Training information + */ + explicit TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg, + std::vector<std::unique_ptr<CompilerOptions>> &copts, + const TrainingInfo &training_info); + + /** + * @brief Default Construct + * + */ + TrainingCompiler(void) = delete; + + /** + * @brief Destroy the TrainingCompiler object + */ + ~TrainingCompiler() = default; + +public: + /** + * @brief Do compilation with the options + * + * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation + */ + std::shared_ptr<CompilerArtifact> compile(void); + +private: + std::shared_ptr<ir::Model> _model; + CompilerOptions *_options; + const TrainingInfo _training_info; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_ diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc new file mode 100644 index 000000000..6a5a052b6 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UntrainableOperationConverter.h" + +#include "ir/train/operation/UntrainableOperation.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +UntrainableOperationConverter::UntrainableOperationConverter(ir::train::TrainableGraph &tgraph) + : _tgraph{tgraph}, _return_op{nullptr} +{ +} + +std::unique_ptr<ir::train::ITrainableOperation> UntrainableOperationConverter:: +operator()(const ir::IOperation &op) +{ + op.accept(*this); + + return std::move(_return_op); +} + +#define OP(InternalName) \ + void UntrainableOperationConverter::visit(const ir::operation::InternalName &node) \ + { \ + _return_op = \ + std::make_unique<ir::train::operation::UntrainableOperation<ir::operation::InternalName>>( \ + node); \ + } +#include "ir/Operations.lst" +#undef OP + +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h new file mode 100644 index 000000000..e960b3831 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ +#define __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ + +#include "ir/Operations.Include.h" +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableGraph.h" + +#include <memory> + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class UntrainableOperationConverter : public ir::OperationVisitor +{ +public: + UntrainableOperationConverter(ir::train::TrainableGraph &tgraph); + std::unique_ptr<ir::train::ITrainableOperation> operator()(const ir::IOperation &op); + +#define OP(InternalName) void visit(const ir::operation::InternalName &node); +#include "ir/Operations.lst" +#undef OP + +protected: + ir::train::TrainableGraph &_tgraph; + std::unique_ptr<ir::train::ITrainableOperation> _return_op; +}; + +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__ diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc new file mode 100644 index 000000000..3e01a9739 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LossInsertionPass.h" + +#include "ir/train/TrainableGraph.h" +#include "ir/train/operation/Loss.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ +namespace pass +{ + +void LossInsertionPass::run() +{ + const auto &loss_info = _training_info->lossInfo(); + + ir::operation::Loss::Param param; + param.op_type = loss_info.type; + + if (_trainable_graph.getOutputs().size() != 1) + { + throw std::runtime_error("LossInsertionPass: Not supported multiple outputs"); + } + + // TODO Consider SparseCategoricalCrossentropy y_true shape + // SparseCategoricalCrossentropy loss has a different y_true shape than y_pred. + + // TODO Implement Loop [0, getOutputs().size()) + // index: a loop index + const auto index = 0; + const auto &y_pred_index = _trainable_graph.getOutputs().at(index); + const auto &y_pred = _trainable_graph.operands().at(y_pred_index); + const auto &shape = y_pred.shape(); + const auto &type_info = y_pred.typeInfo(); + auto y_true_index = _trainable_graph.addOperand(shape, type_info); + ir::OperandIndexSequence inputs{y_pred_index, y_true_index}; + + // TODO Consider Reduction + // Some types of Reduction have the same shape y_true and output. + + const ir::TypeInfo float_op(ir::DataType::FLOAT32); + auto output_index = _trainable_graph.addOperand(ir::Shape{1}, float_op); + ir::OperandIndexSequence outputs{output_index}; + + auto loss_op = std::make_unique<ir::operation::Loss>(inputs, outputs, param); + auto trainable_loss_op = std::make_unique<ir::train::operation::Loss>(*loss_op); + + _trainable_graph.addOperation(std::move(trainable_loss_op)); + + _trainable_graph.addInput(y_true_index); + + // TODO Add loss as many as output size + _trainable_graph.addLoss(output_index, ir::IOIndex{index}); +} + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h new file mode 100644 index 000000000..ed4d60c96 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ +#define __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ + +#include "Pass.h" + +#include "compiler/train/TrainingInfo.h" + +namespace onert +{ +namespace compiler +{ +namespace train +{ +namespace pass +{ + +class LossInsertionPass : public Pass +{ +public: + LossInsertionPass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info, + const ir::SubgraphIndex &subg_index) + : Pass{trainable_graph, training_info}, _subg_index{subg_index} + { + } + +public: + std::string id() final { return "LossInsertionPass"; } + void run() final; + +private: + ir::SubgraphIndex _subg_index; +}; + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__ diff --git a/runtime/onert/core/src/compiler/train/pass/Pass.h b/runtime/onert/core/src/compiler/train/pass/Pass.h new file mode 100644 index 000000000..d64c06cf4 --- /dev/null +++ b/runtime/onert/core/src/compiler/train/pass/Pass.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TRAIN_PASS_PASS_H__ +#define __ONERT_COMPILER_TRAIN_PASS_PASS_H__ + +#include "../../pass/IPass.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +class TrainableGraph; +} // namespace train +} // namespace ir +} // namespace onert + +namespace onert +{ +namespace compiler +{ +namespace train +{ + +class TrainingInfo; + +namespace pass +{ + +class Pass : public compiler::pass::IPass +{ +public: + Pass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info) + : _trainable_graph{trainable_graph}, _training_info{training_info} + { + } + virtual ~Pass() = default; + +protected: + ir::train::TrainableGraph &_trainable_graph; + const TrainingInfo *_training_info; +}; + +} // namespace pass +} // namespace train +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TRAIN_PASS_PASS_H__ diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc index 38a69696e..9257434fa 100644 --- a/runtime/onert/core/src/dumper/dot/DotBuilder.cc +++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc @@ -29,31 +29,12 @@ DotBuilder::DotBuilder() {} void DotBuilder::update(const Node &node_info) { add(node_info); - for (auto edge : node_info.out_edges()) + for (auto &&edge : node_info.out_edges()) { addEdge(node_info, *edge); } } -void DotBuilder::addOpSequence(const DotSubgraphInfo &subgraph_info) -{ - _dot << "subgraph cluster_" << subgraph_info.index().value() << " {\n"; - _dot << " label=\"" << subgraph_info.label() << "\";\n"; - _dot << " style=filled;\n"; - _dot << " color=lightgrey;\n"; - _dot << " "; - for (auto op : subgraph_info.operations()) - { - _dot << "operation" << op.value() << "; "; - } - for (auto op : subgraph_info.operands()) - { - _dot << "operand" << op.value() << "; "; - } - _dot << "\n"; - _dot << "}\n"; -} - void DotBuilder::writeDot(std::ostream &os) { os << "digraph D {\n" @@ -66,7 +47,7 @@ void DotBuilder::add(const Node &node) _dot << node.id(); std::stringstream ss; _dot << "["; - for (auto attr : node.attributes()) + for (auto &&attr : node.attributes()) { _dot << attr.first << "=\"" << attr.second << "\" "; } diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.h b/runtime/onert/core/src/dumper/dot/DotBuilder.h index 681cbbf5d..30f32f8f9 100644 --- a/runtime/onert/core/src/dumper/dot/DotBuilder.h +++ b/runtime/onert/core/src/dumper/dot/DotBuilder.h @@ -25,7 +25,6 @@ #include "OperationNode.h" #include "OperandNode.h" -#include "DotSubgraphInfo.h" using Operation = onert::ir::Operation; using Object = onert::ir::Operand; @@ -44,7 +43,6 @@ public: public: void update(const Node &dotinfo); - void addOpSequence(const DotSubgraphInfo &subgraph_info); void writeDot(std::ostream &os); diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc index 118057f09..ab77a6c62 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.cc +++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc @@ -19,8 +19,7 @@ #include "DotDumper.h" #include "DotBuilder.h" -#include "DotSubgraphInfo.h" -#include "ir/OpSequence.h" +#include "ir/OperandIndexMap.h" #include "ir/OperationIndexMap.h" #include "backend/Backend.h" #include "backend/IConfig.h" @@ -33,151 +32,153 @@ namespace dumper namespace dot { -void DotDumper::dump(const std::string &tag) +namespace { - if (_level == Level::OFF) - { - return; - } - - onert::dumper::dot::DotBuilder dot_builder; - - auto &operations = _graph.operations(); - auto &operands = _graph.operands(); - - ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes; - std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes; - - auto backend_to_fillcolor = [](const backend::Backend *backend) { - static const auto map = []() { - std::unordered_map<const backend::Backend *, std::string> ret; - uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( - for (const auto backend : compiler::BackendManager::get().getAll()) - { - ret.emplace(backend, Node::BG_COLORS[index]); - index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); - } - return ret; - }(); - - auto itr = map.find(backend); - if (itr == map.end()) - { - return Node::DEFAULT_FILLCOLOR; - } - else +std::string backend_to_fillcolor(const backend::Backend *backend) +{ + static const auto map = []() { + std::unordered_map<const backend::Backend *, std::string> ret; + uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( + for (const auto backend : compiler::BackendManager::get().getAll()) { - return itr->second; + ret.emplace(backend, Node::BG_COLORS[index]); + index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); } - }; + return ret; + }(); + auto itr = map.find(backend); + if (itr == map.end()) + { + return Node::DEFAULT_FILLCOLOR; + } + else + { + return itr->second; + } +} - util::Set<ir::OperandIndex> shown_operand_set; +std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> +generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level) +{ + std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands; + const auto &operands = graph.operands(); operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) { - bool showing_cond = false; - if (_level == Level::ALL) - { - showing_cond = true; - } - else - { - showing_cond = !object.isConstant(); - } - if (object.isConstant() || _graph.getInputs().contains(index)) - { - showing_cond = showing_cond && (object.getUses().size() > 0); - } + bool showing_cond = + level == DotDumper::Level::ALL + ? true + : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index); if (showing_cond) { - shown_operand_set.add(index); - auto type = [&]() { using onert::dumper::dot::Operand; - if (_graph.getInputs().contains(index)) + if (graph.getInputs().contains(index)) return Operand::Type::MODEL_INPUT; - if (_graph.getOutputs().contains(index)) + if (graph.getOutputs().contains(index)) return Operand::Type::MODEL_OUTPUT; return Operand::Type::INTERNAL; }(); auto node = std::make_unique<Operand>(index, type); + std::string label = std::to_string(index.value()); + std::string fillcolor = ""; + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); - { - // Display LowerInfo attributes - std::string label = std::to_string(index.value()); - std::string fillcolor = ""; - if (_lowered_graph) - { - auto lower_info = _lowered_graph->getLowerInfo(index); - const auto &def_factors = lower_info->def_factors(); - if (def_factors.size() > 0) - { - label += "\\n["; - label += def_factors.getOnlyElement().backend()->config()->id(); - label += "]"; - - fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); - } - } - node->setAttribute("label", label); - node->setAttribute("fillcolor", fillcolor); - } - - operand_nodes.emplace(index, std::move(node)); + dot_operands.emplace(index, std::move(node)); } }); - operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) { + return dot_operands; +} + +ir::OperationIndexMap<std::unique_ptr<Operation>> +generate_dot_operations(const ir::Graph &graph, + const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands) +{ + ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations; + const auto &operations = graph.operations(); + operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &op) { auto node = std::make_unique<Operation>(index, op); - for (auto input : op.getInputs()) + for (auto &&input : op.getInputs()) { using onert::dumper::dot::Operand; // Constant input and dump level is ALL_BUT_CONSTANTS - if (operand_nodes.find(input) == operand_nodes.end()) + if (dot_operands.find(input) == dot_operands.end()) continue; - auto &input_node = operand_nodes.at(input); + auto &input_node = dot_operands.at(input); input_node->addOutEdge(node.get()); } - for (auto output : op.getOutputs()) + for (auto &&output : op.getOutputs() | ir::Remove::UNDEFINED) { using onert::dumper::dot::Operand; - auto &output_node = operand_nodes.at(output); + auto &output_node = dot_operands.at(output); node->addOutEdge(output_node.get()); } - operation_nodes.emplace(index, std::move(node)); + dot_operations.emplace(index, std::move(node)); }); - if (_lowered_graph) - { - const auto &op_seqs = _lowered_graph->op_seqs(); - op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph->getLowerInfo(index); + return dot_operations; +} + +void update_lower_info(const compiler::ILoweredGraph &lowered_graph, + ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands) +{ + const auto &operands = lowered_graph.graph().operands(); + operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + auto itr = dot_operands->find(index); + if (itr != dot_operands->end()) + { + auto &node = itr->second; + // Display LowerInfo attributes + std::string label = node->getAttribute("label"); + std::string fillcolor = node->getAttribute("fillcolor"); + auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index); + const auto &def_factors = lower_info->def_factors(); + if (def_factors.size() > 0) + { + label += "\\n["; + label += def_factors.getOnlyElement().backend()->config()->id(); + label += "]"; + fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); + } + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); + } + }); +} + +void update_lower_info(const compiler::ILoweredGraph &lowered_graph, + ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations) +{ + const auto &operations = lowered_graph.graph().operations(); + operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &) { + const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index); + if (lower_info) + { auto fillcolor = backend_to_fillcolor(lower_info->backend()); - std::string label = - std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]"; - DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set, _graph.operations()}; - subgraph_info.label(label); - subgraph_info.fillcolor(fillcolor); - dot_builder.addOpSequence(subgraph_info); - - // Set fillcolor of all operations in the op_seq - for (const auto &op_idx : op_seq.operations()) + std::string backend_label = "[" + lower_info->backend()->config()->id() + "]"; + auto itr = dot_operations->find(index); + if (itr != dot_operations->end()) { - auto found = operation_nodes.find(op_idx); - if (found != operation_nodes.end()) - { - auto &&op = found->second; - op->setAttribute("fillcolor", fillcolor); - } + auto &node = itr->second; + node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label); + node->setAttribute("fillcolor", fillcolor); } - }); - } + } + }); +} +void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes, + const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes, + const std::string &tag) +{ + onert::dumper::dot::DotBuilder dot_builder; for (const auto &e : operation_nodes) dot_builder.update(*e.second); for (const auto &e : operand_nodes) @@ -198,6 +199,34 @@ void DotDumper::dump(const std::string &tag) fb.close(); } } +} // namespace + +void DotDumper::dump(const ir::Graph &graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + const auto dot_operands = generate_dot_operands(graph, _level); + const auto dot_operations = generate_dot_operations(graph, dot_operands); + dump_to_file(dot_operands, dot_operations, tag); +} + +// TODO Support derivative tensors +void DotDumper::dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level); + auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands); + update_lower_info(lowered_graph, &dot_operands); + update_lower_info(lowered_graph, &dot_operations); + dump_to_file(dot_operands, dot_operations, tag); +} } // namespace dot } // namespace dumper diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h index fdbca1642..fca5f356c 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.h +++ b/runtime/onert/core/src/dumper/dot/DotDumper.h @@ -15,7 +15,7 @@ */ #include "ir/Graph.h" -#include "compiler/LoweredGraph.h" +#include "compiler/ILoweredGraph.h" #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__ #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__ @@ -38,27 +38,28 @@ public: }; public: - DotDumper(const ir::Graph &graph, Level level) - : _lowered_graph{nullptr}, _graph(graph), _level{level} - { - } - DotDumper(const compiler::LoweredGraph *lowered_graph, Level level) - : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level} - { - } + DotDumper(Level level) : _level{level} {} public: /** - * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set + * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set + * + * @param[in] graph The graph that would be used to get operations and operands + * @param[in] tag The name of dot file that would be created + * @return N/A + */ + void dump(const ir::Graph &graph, const std::string &tag); + + /** + * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set * + * @param[in] graph The graph that would be used to get operations and operands * @param[in] tag The name of dot file that would be created * @return N/A */ - void dump(const std::string &tag); + void dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag); private: - const compiler::LoweredGraph *_lowered_graph; - const ir::Graph &_graph; Level _level; }; diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc deleted file mode 100644 index 52e9c758d..000000000 --- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DotSubgraphInfo.h" - -#include <sstream> - -namespace onert -{ -namespace dumper -{ -namespace dot -{ - -DotSubgraphInfo::DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq, - const util::Set<ir::OperandIndex> &shown_operands, - const ir::Operations &operations_ctx) - : _index{index} -{ - for (const auto &op_idx : op_seq.operations()) - { - _operations.insert(op_idx); - const auto &node = operations_ctx.at(op_idx); - for (auto o : node.getInputs()) - { - // Must be a shown operand, not op_seq's inputs - if (shown_operands.contains(o) && !op_seq.getInputs().contains(o)) - { - _operands.insert(o); - } - } - for (auto o : node.getOutputs()) - { - // Must be a shown operand, not op_seq's inputs - if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o)) - { - _operands.insert(o); - } - } - } -} - -} // namespace dot -} // namespace dumper -} // namespace onert diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h deleted file mode 100644 index 95ba8953e..000000000 --- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ -#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ - -#include <unordered_set> - -#include "ir/Index.h" -#include <ir/Operations.h> -#include "ir/OpSequence.h" -#include "util/Set.h" - -namespace onert -{ -namespace dumper -{ -namespace dot -{ - -class DotSubgraphInfo -{ -public: - DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq, - const util::Set<ir::OperandIndex> &shown_operands, - const ir::Operations &operations_ctx); - - ir::OpSequenceIndex index() const { return _index; } - std::string label() const { return _label; } - void label(const std::string &val) { _label = val; } - std::string fillcolor() const { return _fillcolor; } - void fillcolor(const std::string &val) { _fillcolor = val; } - const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; } - const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; } - -private: - ir::OpSequenceIndex _index; - std::string _label; - std::string _fillcolor; - std::unordered_set<ir::OperationIndex> _operations; - std::unordered_set<ir::OperandIndex> _operands; -}; - -} // namespace dot -} // namespace dumper -} // namespace onert - -#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__ diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.cc b/runtime/onert/core/src/dumper/dot/OperandNode.cc index 5a6015ca9..49319d595 100644 --- a/runtime/onert/core/src/dumper/dot/OperandNode.cc +++ b/runtime/onert/core/src/dumper/dot/OperandNode.cc @@ -18,7 +18,6 @@ #include "OperandNode.h" #include "ir/Graph.h" -#include "ir/operand/LowerInfo.h" namespace onert { @@ -33,10 +32,10 @@ const std::string Operand::OPERAND_SHAPE = "ellipse"; const std::string Operand::BG_COLOR_SCHEME = "set18"; Operand::Operand(const ir::OperandIndex &index, Type type) - : Node{"operand" + std::to_string(index.value())} + : Node{"operand" + std::to_string(index.value())} { { - auto type_to_shape = [](Type type) { + auto type_to_shape = [](Type type) -> const auto & { switch (type) { case Type::MODEL_INPUT: diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.h b/runtime/onert/core/src/dumper/dot/OperandNode.h index 2e7cc5861..f2aea80ad 100644 --- a/runtime/onert/core/src/dumper/dot/OperandNode.h +++ b/runtime/onert/core/src/dumper/dot/OperandNode.h @@ -64,7 +64,6 @@ public: * * @param[in] index Operand index * @param[in] type Operand type - * @param[in] lower_info Operand LowerInfo */ Operand(const ir::OperandIndex &index, Type type); diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc index bee137e7c..2ef08c9c6 100644 --- a/runtime/onert/core/src/dumper/dot/OperationNode.cc +++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc @@ -18,7 +18,6 @@ #include "OperationNode.h" #include "ir/Graph.h" -#include "ir/operation/LowerInfo.h" #include "backend/IConfig.h" #include "backend/Backend.h" @@ -32,8 +31,8 @@ namespace dot const std::string Operation::OPERATION_SHAPE = "rect"; const std::string Operation::BG_COLOR_SCHEME = "pastel18"; -Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node) - : Node{"operation" + std::to_string(index.value())} +Operation::Operation(const ir::OperationIndex &index, const ir::IOperation &node) + : Node{"operation" + std::to_string(index.value())} { setAttribute("label", std::to_string(index.value()) + " : " + node.name()); setAttribute("shape", OPERATION_SHAPE); diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.h b/runtime/onert/core/src/dumper/dot/OperationNode.h index 74a37d3fb..d9292ad0c 100644 --- a/runtime/onert/core/src/dumper/dot/OperationNode.h +++ b/runtime/onert/core/src/dumper/dot/OperationNode.h @@ -25,7 +25,7 @@ #define __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__ #include "Node.h" -#include "ir/Operation.h" +#include "ir/IOperation.h" #include "ir/Index.h" namespace onert @@ -52,7 +52,7 @@ public: * @param[in] index operation index * @param[in] node operation object */ - Operation(const ir::OperationIndex &index, const ir::Operation &node); + Operation(const ir::OperationIndex &index, const ir::IOperation &node); }; } // namespace dot diff --git a/runtime/onert/core/src/compiler/ParamChecker.cc b/runtime/onert/core/src/dumper/h5/Dumper.cc index c4f80f087..5e12c2dbb 100644 --- a/runtime/onert/core/src/compiler/ParamChecker.cc +++ b/runtime/onert/core/src/dumper/h5/Dumper.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,21 @@ * limitations under the License. */ -#include "ParamChecker.h" +#include "Dumper.h" -#include "ir/Graph.h" +#include <iostream> +#include <sstream> +#include <stdexcept> namespace onert { -namespace compiler +namespace dumper { - -void ParamChecker::operator()() +namespace h5 { - _model->operations().iterate( - [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); -} -} // namespace compiler +Dumper::Dumper(const std::string &filepath) : _file{filepath, H5F_ACC_CREAT | H5F_ACC_RDWR} {} + +} // namespace h5 +} // namespace dumper } // namespace onert diff --git a/runtime/onert/core/src/dumper/h5/Dumper.h b/runtime/onert/core/src/dumper/h5/Dumper.h new file mode 100644 index 000000000..53d0e0332 --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/Dumper.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_DUMPER_H5_DUMPER_H__ +#define __ONERT_DUMPER_H5_DUMPER_H__ + +#include "exec/MinMaxMap.h" + +#include <H5Cpp.h> +#include <string> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +class Dumper +{ +public: + /** + * @brief Construct dumper + * + * @param[in] path filepath to dump + * @throw H5::FileIException on error during file open/create + */ + Dumper(const std::string &filepath); + +protected: + H5::H5File _file; +}; + +} // namespace h5 +} // namespace dumper +} // namespace onert + +#endif // __ONERT_DUMPER_H5_DUMPER_H__ diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc new file mode 100644 index 000000000..8a9de9f95 --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MinMaxDumper.h" + +#include <iostream> +#include <sstream> +#include <stdexcept> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +static const char *h5_value_grpname = "value"; + +/* + * ensure grp_name exists in parent + */ +H5::Group ensureGroup(H5::Group parent, const char *child) +{ + H5::Exception::dontPrint(); + try + { + return parent.openGroup(child); + } + catch (H5::Exception &e) + { + return parent.createGroup(child); + } +} + +MinMaxDumper::MinMaxDumper(const std::string &filepath) : Dumper(filepath) +{ + auto root_grp = _file.openGroup("/"); + ensureGroup(root_grp, h5_value_grpname); +} + +void MinMaxDumper::dump(const exec::SMMinMaxMap &mmmap) const +{ + auto val_grp = _file.openGroup(h5_value_grpname); + auto num_run = val_grp.getNumObjs(); + auto num_grp = val_grp.createGroup(std::to_string(num_run)); + auto model_grp = ensureGroup(num_grp, "0"); + hsize_t dims[] = {2}; + H5::DataSpace dspace(1, dims); // rank=1, dim(0)=2, {min, max} + for (auto &&e : mmmap) + { + // key = {subg_idx, op_idx} = e.first + const auto subg_idx = e.first.first.value(); + const auto op_idx = e.first.second.value(); + auto subg_grp = ensureGroup(model_grp, std::to_string(subg_idx).c_str()); + auto op_dset = subg_grp.createDataSet(std::to_string(op_idx), H5::PredType::IEEE_F32BE, dspace); + op_dset.write(e.second.data, H5::PredType::NATIVE_FLOAT); + } +} + +} // namespace h5 +} // namespace dumper +} // namespace onert diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.h b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h new file mode 100644 index 000000000..1f1b27c6e --- /dev/null +++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ +#define __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ + +#include "exec/MinMaxMap.h" +#include "Dumper.h" + +#include <H5Cpp.h> +#include <string> + +namespace onert +{ +namespace dumper +{ +namespace h5 +{ + +// The hierachy of single model minmax h5 file +// +// GROUP / +// GROUP value +// └── GROUP run_idx +// └── GROUP model_idx +// └── GROUP subg_idx +// └── DATASET op_idx +// DATATYPE Float32 +// DATASPACE (2) +// DATA { min, max } +// GROUP name (optional, for debug) +// └── GROUP model_idx +// └── GROUP subg_idx +// └── ATTRIBUTE op_idx +// DATATYPE String +// DATA { "model/your/op/name"} +// +class MinMaxDumper : private Dumper +{ +public: + MinMaxDumper(const std::string &filepath); + /** + * @brief Dump minmax map + * + * @param[in] map single model minmax map + */ + void dump(const exec::SMMinMaxMap &map) const; + +private: + H5::Group _val_grp; +}; + +} // namespace h5 +} // namespace dumper +} // namespace onert + +#endif // __ONERT_DUMPER_H5_MINMAX_DUMPER_H__ diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.cc b/runtime/onert/core/src/dumper/text/GraphDumper.cc new file mode 100644 index 000000000..6bd7904aa --- /dev/null +++ b/runtime/onert/core/src/dumper/text/GraphDumper.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GraphDumper.h" + +#include "ir/Graph.h" +#include "compiler/LoweredGraph.h" +#ifdef ONERT_TRAIN +#include "compiler/train/LoweredTrainableGraph.h" +#endif // ONERT_TRAIN +#include "util/logging.h" +#include "misc/string_helpers.h" + +namespace onert +{ +namespace dumper +{ +namespace text +{ + +namespace +{ + +std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq) +{ + std::vector<std::string> strs; + for (auto &&ind : seq) + strs.push_back(dumper::text::formatOperandBrief(ind)); + return nnfw::misc::join(strs.begin(), strs.end(), ", "); +} + +} // namespace + +std::string formatOperandBrief(ir::OperandIndex ind) +{ + std::stringstream ss; + ss << ind; + return ss.str(); +} + +std::string formatOperand(const ir::Graph &, ir::OperandIndex ind) +{ + std::stringstream ss; + ss << ind; + // TODO Print shape, type and maybe more + return ss.str(); +} + +std::string formatOperation(const ir::IOperation &op, ir::OperationIndex ind) +{ + std::stringstream ss; + + ss << formatOperandIndexSequence(op.getOutputs()); + ss << " = "; + ss << ind << "_" << op.name() << "("; + ss << formatOperandIndexSequence(op.getInputs()); + ss << ")"; + return ss.str(); +} + +std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind) +{ + std::stringstream ss; + const auto &op = graph.operations().at(ind); + return formatOperation(op, ind); +} + +void dumpGraph(const ir::Graph &graph) +{ + VERBOSE(GraphDumper) << "{\n"; + auto ops_topol = graph.topolSortOperations(); + for (auto &&op_ind : ops_topol) + { + const auto &op = graph.operations().at(op_ind); + VERBOSE(GraphDumper) << " " << formatOperation(op, op_ind) << "\n"; + } + VERBOSE(GraphDumper) << "}\n"; + VERBOSE(GraphDumper) << std::endl; +} + +void dumpLoweredGraph(const compiler::LoweredGraph &lgraph) +{ + // TODO Graph dump with backend info + dumpGraph(lgraph.graph()); +} + +#ifdef ONERT_TRAIN +void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph) +{ + // TODO Graph dump with backend info + dumpGraph(lgraph.graph()); +} +#endif // ONERT_TRAIN + +} // namespace text +} // namespace dumper +} // namespace onert diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.h b/runtime/onert/core/src/dumper/text/GraphDumper.h new file mode 100644 index 000000000..ab0061465 --- /dev/null +++ b/runtime/onert/core/src/dumper/text/GraphDumper.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__ +#define __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__ + +#include <ir/Index.h> + +namespace onert +{ +namespace ir +{ +class Graph; +struct IOperation; +} // namespace ir +} // namespace onert + +namespace onert +{ +namespace compiler +{ +class LoweredGraph; + +#ifdef ONERT_TRAIN +namespace train +{ +class LoweredTrainableGraph; +} // namespace train +#endif // ONERT_TRAIN +} // namespace compiler +} // namespace onert + +namespace onert +{ +namespace dumper +{ +namespace text +{ + +std::string formatOperandBrief(ir::OperandIndex ind); +std::string formatOperand(const ir::Graph &, ir::OperandIndex ind); +std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind); +void dumpGraph(const ir::Graph &graph); +void dumpLoweredGraph(const compiler::LoweredGraph &lgraph); +#ifdef ONERT_TRAIN +void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph); +#endif // ONERT_TRAIN + +} // namespace text +} // namespace dumper +} // namespace onert + +#endif // __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__ diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc index a69ae9cdb..e0b00077f 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.cc +++ b/runtime/onert/core/src/exec/DataflowExecutor.cc @@ -54,14 +54,13 @@ void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id) { auto &job = _waiting_jobs[id]; assert(job != nullptr); - auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]); - auto rank = calculateRank(op_seq.operations()); + auto rank = calculateRank({_job_to_op[job->index()]}); _ready_jobs.emplace(rank, std::move(job)); } void DataflowExecutor::notify(uint32_t finished_job_id) { - for (auto id : _output_info[finished_job_id]) + for (auto &&id : _output_info[finished_job_id]) { assert(_input_info[id] > 0); auto count = --_input_info[id]; @@ -77,52 +76,49 @@ bool DataflowExecutor::noWaitingJobs() [](const std::unique_ptr<Job> &job) { return job == nullptr; }); } -DataflowExecutor::DataflowExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs, - compiler::CodeMap &&code_map) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(tensor_mgrs)}, - _code_map{std::move(code_map)} +DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx}, + _code_map{std::move(code_map)} { VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; - const auto &op_seqs = _lowered_graph->op_seqs(); - // Assign jobs convert OpSequenceIndex to job index(uint32_t) + // Assign jobs convert OperationIndex to job index(uint32_t) uint32_t next_job_index = 0; - std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job; - op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) { - VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex " - << op_seq_index.value() << std::endl; + std::unordered_map<ir::OperationIndex, uint32_t> op_to_job; + const auto &operations = _lowered_graph->graph().operations(); + operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) { + VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind + << std::endl; _finished_jobs.emplace_back( - std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get())); - op_seq_to_job[op_seq_index] = next_job_index++; + std::make_unique<Job>(next_job_index, _code_map.at(op_ind).fn_seq.get())); + op_to_job[op_ind] = next_job_index++; }); _waiting_jobs.resize(next_job_index); _output_info.resize(next_job_index); _initial_input_info.resize(next_job_index, 0); - op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) { - auto job_index = op_seq_to_job[op_seq_index]; - for (auto output : op_seq.getOutputs()) + operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &op) { + auto job_index = op_to_job[op_ind]; + for (auto &&output : op.getOutputs()) { // Update output and input info - op_seqs.iterate( - [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) { - if (op_seq_cur.getInputs().contains(output)) - { - auto dep_index = op_seq_to_job[op_seq_cur_index]; - ++_initial_input_info[dep_index]; - _output_info[job_index].push_back(dep_index); - } - }); + operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::IOperation &op_cur) { + if (op_cur.getInputs().contains(output)) + { + auto dep_index = op_to_job[op_cur_ind]; + ++_initial_input_info[dep_index]; + _output_info[job_index].push_back(dep_index); + } + }); } }); - for (const auto &s : op_seq_to_job) - _job_to_op_seq.emplace(s.second, s.first); + for (const auto &s : op_to_job) + _job_to_op.emplace(s.second, s.first); _input_info = _initial_input_info; } @@ -145,35 +141,38 @@ void DataflowExecutor::executeImpl() } assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); while (!_ready_jobs.empty()) { auto job = std::move((_ready_jobs.begin())->second); _ready_jobs.erase(_ready_jobs.begin()); auto job_index = job->index(); - VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl; + VERBOSE(DataflowExecutor) << "Run job " << job_index << std::endl; + + auto op_ind = _job_to_op[job_index]; + const backend::Backend *backend = _lowered_graph->lower_info().operation.at(op_ind).backend(); - auto op_seq_index = _job_to_op_seq[job_index]; - auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index); - const backend::Backend *backend = - _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend(); + _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend); - _subject.notifyJobBegin(this, op_seq, backend); + job->fn_seq()->initRunning(); // check if FunctionSequence needs to handle dynamic tensor - bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists; + bool handle_dynamic_tensor = + _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists; job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor); job->run(); - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend); notify(job_index); _finished_jobs[job_index] = std::move(job); } assert(noWaitingJobs()); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index 8d60e3e4b..1649be733 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -17,17 +17,17 @@ #ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ #define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ -#include <list> -#include <map> -#include <unordered_map> - -#include "exec/FunctionSequence.h" +#include "ExecutorBase.h" #include "Job.h" + +#include "compiler/CodeMap.h" #include "ir/OperandIndexSequence.h" -#include "ir/Index.h" +#include "util/TracingCtx.h" + +#include <list> +#include <map> #include <memory> -#include "exec/ExecutorBase.h" -#include "compiler/CodeMap.h" +#include <unordered_map> namespace onert { @@ -47,13 +47,12 @@ public: * * @param lowered_graph LoweredGraph object * @param tensor_builders Tensor builders that are currently used - * @param code_map OpSequence and its code map + * @param code_map @c ir::Operation and its code map */ DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, - backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map); + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; @@ -88,7 +87,7 @@ protected: std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs; /// @brief Which job runs which op and function. - std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq; + std::unordered_map<uint32_t, ir::OperationIndex> _job_to_op; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc index 70bddfce4..4cbf2fe64 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInference.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "exec/DynamicShapeInference.h" +#include "exec/DynamicShapeInferer.h" #include "util/ShapeInference.h" #include <assert.h> @@ -23,14 +23,6 @@ namespace onert namespace exec { -inline backend::IDynamicTensorManager * -dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor) -{ - if (!tensor->dynamic_tensor_manager()) - throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."}; - return tensor->dynamic_tensor_manager(); -} - void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx, const ir::OperandIndex rhs_idx) @@ -56,15 +48,15 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, So, only when all inputs are static, we can skip dynamic shape inference. */ - if ((!lhs->is_dynamic()) && (!rhs->is_dynamic())) - return; - auto output_idx = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_idx); + if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output)) + return; + ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape); - dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -96,30 +88,32 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) +void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op) { - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto &input = _tensor_registry->getITensor(input_idx); - auto input_shape = input->getShape(); + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto input = _tensor_registry->getITensor(input_idx); - if (!input->is_dynamic()) - return; - - const auto rank = input_shape.rank(); - const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); - - assert(0 <= axis && axis < rank); + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; + const auto axis = _tensor_registry->getITensor(axis_idx); auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank); + if (!input->is_dynamic() && !output->is_dynamic()) + return; - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + auto input_shape = input->getShape(); + auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer()); + const auto rank = input_shape.rank(); + axis_value = axis_value < 0 ? axis_value + rank : axis_value; + + ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank); + + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -141,7 +135,68 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op) // TODO auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param()); - dynamicTensorManagerOf(output)->applyShape(output_index, new_shape); + output->applyShape(new_shape); +} + +void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op) +{ + const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; + const auto &input = _tensor_registry->getITensor(input_idx); + + const auto cluster_idx{ + op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; + const auto &cluster = _tensor_registry->getITensor(cluster_idx); + assert(cluster->is_constant()); + + if (!input->is_dynamic()) + return; + + auto input_shape = input->getShape(); + auto cluster_shape = cluster->getShape(); + + auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer()); + assert(cluster_buf); + + ir::Shape new_shape = + shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf); + + auto output_ind = op.getOutputs().at(0); + auto output = _tensor_registry->getITensor(output_ind); + + output->applyShape(new_shape); + assert(output->buffer() != nullptr); +} + +void DynamicShapeInferer::visit(const ir::operation::BCQGather &op) +{ + const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; + const auto &indices = _tensor_registry->getITensor(indices_idx); + + const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)}; + const auto &input_binary = _tensor_registry->getITensor(input_binary_idx); + + const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; + const auto &cluster = _tensor_registry->getITensor(cluster_idx); + assert(cluster->is_constant()); + + if (!indices->is_dynamic()) + return; + + auto indices_shape = indices->getShape(); + auto cluster_shape = cluster->getShape(); + auto rank = input_binary->getShape().rank(); + + auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer()); + assert(cluster_buf); + + ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape, + cluster_buf, rank, op.param()); + + auto output_ind = op.getOutputs().at(0); + auto output = _tensor_registry->getITensor(output_ind); + + output->applyShape(new_shape); + assert(output->buffer() != nullptr); } void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op) @@ -167,10 +222,10 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op) assert(shape); // It shouldn't be 0. auto output_shape = shape_inference::inferBroadcastToShape( - shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer())); + shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer())); // set output shape and output buffer - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -198,7 +253,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) So, only when all inputs are static, we can skip dynamic shape inference. */ bool all_static = true; - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); if (input->is_dynamic()) @@ -215,15 +270,17 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) { auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2, int32_t axis) { - if (input1->num_dimensions() != input2->num_dimensions()) + auto shape1 = input1->getShape(); + auto shape2 = input2->getShape(); + if (shape1.rank() != shape2.rank()) return false; - for (size_t i = 0; i < input1->num_dimensions(); i++) + for (int i = 0; i < shape1.rank(); i++) { - auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions(); + auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank(); if (i != positive_axis) - if (input1->dimension(i) != input2->dimension(i)) + if (shape1.dim(i) != shape2.dim(i)) return false; } @@ -233,17 +290,17 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) auto first_input_ind = op.getInputs().at(0); auto first_input = _tensor_registry->getITensor(first_input_ind); - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); - if (input != first_input && !isConcatible(first_input.get(), input.get(), op.param().axis)) + if (input != first_input && !isConcatible(first_input, input, op.param().axis)) throw std::runtime_error("input shapes does not matched for concat"); } } // getting output shape onert::shape_inference::Shapes in_shapes; - for (auto input_ind : op.getInputs()) + for (auto &&input_ind : op.getInputs()) { auto input = _tensor_registry->getITensor(input_ind); ir::Shape shape = input->getShape(); @@ -255,7 +312,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op) auto output = _tensor_registry->getITensor(output_ind); auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param()); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); } void DynamicShapeInferer::visit(const ir::operation::Conv2D &op) @@ -278,7 +335,7 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op) ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param()); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -333,12 +390,18 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op) auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS); auto axis = _tensor_registry->getITensor(axis_ind); - auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer()); - assert(axis_buf); + auto axis_type = axis->data_type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); + + assert(axis->buffer()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast<const int32_t *>(axis->buffer())[0] + : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]); - auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]); + auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -347,21 +410,26 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op) // check if output is not dynamic auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT); - auto input = _tensor_registry->getITensor(input_ind); - ir::Shape input_shape = input->getShape(); + auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE); + auto shape = _tensor_registry->getITensor(shape_ind); - if ((!input->is_dynamic()) && (!output->is_dynamic())) + if ((!shape->is_dynamic()) && (!output->is_dynamic())) return; - assert(input.get()->data_type() == ir::DataType::INT32); + const auto dims_type = shape->data_type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); - auto input_buf = reinterpret_cast<const int32_t *>(input->buffer()); - assert(input_buf); + auto dims_buf = shape->buffer(); + assert(dims_buf); - auto output_shape = shape_inference::inferFillShape(input_shape, input_buf); + const auto &dims_shape = shape->getShape(); + const auto &output_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape<int32_t>( + dims_shape, reinterpret_cast<const int32_t *>(dims_buf)) + : shape_inference::inferFillShape<int64_t>( + dims_shape, reinterpret_cast<const int64_t *>(dims_buf))); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -384,7 +452,7 @@ void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op) auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -416,7 +484,7 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op) auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -425,11 +493,122 @@ void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT)); } +void DynamicShapeInferer::visit(const ir::operation::LSTM &op) +{ + const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; + auto output = _tensor_registry->getITensor(output_index); + + const auto output_state_out_index{ + op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; + + const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; + + const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; + + if (!output->is_dynamic() && + !(_tensor_registry->getITensor(output_state_out_index) != nullptr && + _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) && + !(_tensor_registry->getITensor(cell_state_out_index) != nullptr && + _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) && + !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr && + _tensor_registry->getITensor(cell_state_out_index)->is_dynamic())) + return; + + const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)}; + const auto input = _tensor_registry->getITensor(input_index); + const auto input_shape = input->getShape(); + + const auto input_to_output_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index); + const auto input_to_output_weights_shape = input_to_output_weights->getShape(); + + const auto recurrent_to_output_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_output_weights = + _tensor_registry->getITensor(recurrent_to_output_weights_index); + const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape(); + + // re-sizing outputs + const int n_batch = + (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0); + const int n_cell = input_to_output_weights_shape.dim(0); + const int n_output = recurrent_to_output_weights_shape.dim(1); + if (input_shape.rank() == 3) + { + if (op.param().time_major) + output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output}); + else + output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output}); + } + else + { + assert(input_shape.rank() == 2); + output->applyShape(ir::Shape{n_batch, n_output}); + } + assert(output->buffer() != nullptr); + + auto output_state_out = _tensor_registry->getITensor(output_state_out_index); + if (output_state_out != nullptr) + { + output_state_out->applyShape(ir::Shape{n_batch, n_output}); + assert(output_state_out->buffer() != nullptr); + } + + auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index); + if (cell_state_out != nullptr) + { + cell_state_out->applyShape(ir::Shape{n_batch, n_cell}); + assert(cell_state_out->buffer() != nullptr); + } + + auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index); + if (scratch_buffer != nullptr) + { + const auto input_to_input_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{ + op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; + + const auto input_to_input_weights_shape = + _tensor_registry->getITensor(input_to_input_weights_index)->getShape(); + bool has_input_to_input_weights = + input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0; + + const auto recurrent_to_input_weights_shape = + _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape(); + bool has_recurrent_to_input_weights = + recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0; + + // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). + // true: no CIFG + // false: CIFG + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; + if (has_cifg_param) + { + scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4}); + } + else + { + scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3}); + } + assert(scratch_buffer->buffer() != nullptr); + } +} + void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op) { handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT)); } +void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */) +{ + // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time + // by static shape inferer. + // DetectionPostProcess's outputs' shape are independent with input shape + // and decided by parameter value. +} + void DynamicShapeInferer::visit(const ir::operation::OneHot &op) { auto output_ind = op.getOutputs().at(0); @@ -452,7 +631,7 @@ void DynamicShapeInferer::visit(const ir::operation::OneHot &op) const auto axis_val = op.param().axis; ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -488,7 +667,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pack &op) ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -512,10 +691,10 @@ void DynamicShapeInferer::visit(const ir::operation::Pad &op) assert(pad_buf); auto output_shape = - shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements()); + shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements()); // change output shape and reallocate output tensor memory - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -556,18 +735,18 @@ void DynamicShapeInferer::visit(const ir::operation::Range &op) if (output->data_type() == ir::DataType::FLOAT32) { new_shape = - shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()), - *reinterpret_cast<float *>(limit_tensor->buffer()), - *reinterpret_cast<float *>(delta_tensor->buffer())); + shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()), + *reinterpret_cast<float *>(limit_tensor->buffer()), + *reinterpret_cast<float *>(delta_tensor->buffer())); } else if (output->data_type() == ir::DataType::INT32) { new_shape = shape_inference::inferRangeShape<int32_t>( - *reinterpret_cast<int32_t *>(start_tensor->buffer()), - *reinterpret_cast<int32_t *>(limit_tensor->buffer()), - *reinterpret_cast<int32_t *>(delta_tensor->buffer())); + *reinterpret_cast<int32_t *>(start_tensor->buffer()), + *reinterpret_cast<int32_t *>(limit_tensor->buffer()), + *reinterpret_cast<int32_t *>(delta_tensor->buffer())); } - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -611,7 +790,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reduce &op) ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -659,13 +838,13 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op) assert(new_shape_buf); auto output_shape = shape_inference::inferReshapeShape( - new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements()); + new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements()); // if shape is changed, change output shape and reallocate output tensor memory if (output_shape != output->getShape() || output->buffer() == nullptr) { // change on output shape - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); } assert(output->buffer() != nullptr); } @@ -681,7 +860,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op) if (output_shape != output->getShape() || output->buffer() == nullptr) { // change on output shape - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); } assert(output->buffer() != nullptr); } @@ -705,14 +884,35 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op) return; // getting output shape from input shape and Params - auto output_shape = shape_inference::inferResizeBilinearShape( - input->getShape(), op.param().height_out, op.param().width_out); + int32_t height_out, width_out; + if (op.getInputs().size() == 2) + { + auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE); + auto size = _tensor_registry->getITensor(size_ind); + if (size->data_type() == ir::DataType::INT32) + { + auto size_buf = reinterpret_cast<const int32_t *>(size->buffer()); + height_out = size_buf[0]; + width_out = size_buf[1]; + } + else + { + throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type"); + } + } + else + { + height_out = op.param().height_out; + width_out = op.param().width_out; + } + auto output_shape = + shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out); // if shape is changed, change output shape and reallocate output tensor memory if (output_shape != output->getShape() || output->buffer() == nullptr) { // change on output shape - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); } assert(output->buffer() != nullptr); } @@ -744,12 +944,12 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op) // Select output shpae ir::Shape new_shape = - shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape); + shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape); auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -768,7 +968,7 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op) ir::Shape output_shape; output_shape.append(input_shape.rank()); - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -794,7 +994,7 @@ void DynamicShapeInferer::visit(const ir::operation::Slice &op) ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf); - dynamicTensorManagerOf(output)->applyShape(output_index, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -829,9 +1029,9 @@ void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op) auto padding_data = reinterpret_cast<int32_t *>(padding->buffer()); ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape( - input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data); + input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data); - dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -840,27 +1040,37 @@ void DynamicShapeInferer::visit(const ir::operation::Split &op) const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)}; const auto &input = _tensor_registry->getITensor(input_idx); - if (!input->is_dynamic()) + // Return if all tensors are not dynamic + bool has_dynamic = false; + for (const auto &output_idx : op.getOutputs()) + { + auto output = _tensor_registry->getITensor(output_idx); + has_dynamic |= output->is_dynamic(); + } + if (!input->is_dynamic() && !has_dynamic) { return; } auto input_shape = input->getShape(); - const auto axis = op.param().axis; + const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)}; + const auto &axis = _tensor_registry->getITensor(axis_idx); + + auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer()); const auto num_splits = op.param().num_splits; const auto rank = input_shape.rank(); - auto axis_resolved = axis < 0 ? axis + rank : axis; + axis_value = axis_value < 0 ? axis_value + rank : axis_value; - assert(0 <= axis_resolved && axis_resolved < rank); + assert(0 <= axis_value && axis_value < rank); - ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_resolved, num_splits); + ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits); for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++) { auto output_ind = op.getOutputs().at(out_tensor_idx); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } } @@ -889,7 +1099,7 @@ void DynamicShapeInferer::visit(const ir::operation::Squeeze &op) auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -920,17 +1130,16 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op) const auto rank = input_shape.rank(); auto op_params = shape_inference::buildStridedSliceParams( - reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()), - reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, - rank); + reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()), + reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank); auto output_index = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_index); ir::Shape output_shape = - onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank); + onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank); - dynamicTensorManagerOf(output)->applyShape(output_index, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -952,10 +1161,12 @@ void DynamicShapeInferer::visit(const ir::operation::Tile &op) auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer()); assert(multiplier_buffer); - auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer); + auto mult_shape = multiplier->getShape(); + auto output_shape = shape_inference::inferTileShape( + input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0)); // set output shape and output buffer - dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); + output->applyShape(output_shape); assert(output->buffer() != nullptr); } @@ -967,17 +1178,49 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op) // from op, access the buffer of second input to read new shape auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT); - auto input_tensor = _tensor_registry->getITensor(input_ind); - auto input_shape = input_tensor->getShape(); + auto input = _tensor_registry->getITensor(input_ind); + auto input_shape = input->getShape(); + + /* + Here, the state after compilation (static shape inference) could be one of the following: + + input perms output execution-time shape inf required + ------------------------------------ -------------------------------- + case 1) static const static X + case 2) static non-const dynamic O + case 3) dynamic const dynamic O + case 4) dynamic non-const dynamic O - if (!input_tensor->is_dynamic()) + So, only when both input1 and ouput are static, we can skip dynamic shape inference. + */ + if ((!input->is_dynamic()) && (!output->is_dynamic())) return; - const auto perm{op.param().perm}; - // set output shape, based on input and params - ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm); + auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION); + auto perm = _tensor_registry->getITensor(perm_ind); + + ir::Shape new_shape; + // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0 + if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0) + { + // Call by (n-1...0) + new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0); + } + else + { + // Check rank + if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements()) + { + throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " + + std::to_string(perm->getShape().num_elements())); + } - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + // set output shape, based on input and params + const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer()); + new_shape = + shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0)); + } + output->applyShape(new_shape); assert(output->buffer() != nullptr); } @@ -1005,7 +1248,7 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op) auto output_ind = op.getOutputs().at(out_tensor_idx); auto output = _tensor_registry->getITensor(output_ind); - dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape); + output->applyShape(new_shape); assert(output->buffer() != nullptr); } diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc index 6bf2744a9..4b82655b9 100644 --- a/runtime/onert/core/src/exec/ExecTime.cc +++ b/runtime/onert/core/src/exec/ExecTime.cc @@ -14,12 +14,10 @@ * limitations under the License. */ -#include "exec/ExecTime.h" +#include "ExecTime.h" -#include <fstream> -#include <cassert> -#include <limits> #include <algorithm> +#include <cassert> namespace onert { diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h index 846d0930b..95f460053 100644 --- a/runtime/onert/core/src/exec/ExecTime.h +++ b/runtime/onert/core/src/exec/ExecTime.h @@ -34,7 +34,7 @@ class ExecTime { public: explicit ExecTime(const std::vector<const backend::Backend *> &backends) - : _json(backends, _measurements) + : _json(backends, _measurements) { } @@ -94,7 +94,7 @@ public: /** * @brief Update metrics file with new data. */ - void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); } + void storeOperationsExecTime() const { _json.storeOperationsExecTime(); } static const int64_t NOT_FOUND = -1; private: diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc new file mode 100644 index 000000000..939184e4e --- /dev/null +++ b/runtime/onert/core/src/exec/ExecTime.test.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecTime.h" + +#include "backend/IConfig.h" +#include "backend/Backend.h" + +#include <gtest/gtest.h> + +#include <string> + +namespace +{ +using namespace onert; +using namespace exec; +using namespace backend; + +struct MockConfig : public IConfig +{ + std::string id() override { return "b1"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackend : public ::onert::backend::Backend +{ + std::shared_ptr<onert::backend::IConfig> config() const override + { + return std::make_shared<MockConfig>(); + } + std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override + { + return nullptr; + } +}; + +TEST(ExecTime, roundtrip_ok) +{ + const auto *b = new MockBackend(); + std::vector<const Backend *> bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.updateOperationExecTime(b, "op1", false, 100, 888); + et.storeOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 150); + ASSERT_EQ(time, 150); + time = et.getOperationExecTime(b, "op1", false, 100); + ASSERT_EQ(time, 888); + et.storeOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} + +TEST(ExecTime, structure) +{ + + const auto *b = new MockBackend(); + std::vector<const Backend *> bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.storeOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 200); + ASSERT_EQ(time, 200); + et.storeOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} +} // unnamed namespace diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc index 7feb3ab68..f51bed820 100644 --- a/runtime/onert/core/src/exec/Execution.cc +++ b/runtime/onert/core/src/exec/Execution.cc @@ -16,6 +16,8 @@ #include "exec/Execution.h" +#include "train/TrainableExecutors.h" + #include "util/logging.h" namespace onert @@ -23,33 +25,30 @@ namespace onert namespace exec { -Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors} +Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors} { assert(executors != nullptr); - assert(executors->at(ir::SubgraphIndex{0}) != nullptr); - const auto &primary_subg = primary_subgraph(); - _io_desc.inputs.resize(primary_subg.getInputs().size()); - _io_desc.outputs.resize(primary_subg.getOutputs().size()); + assert(executors->entryExecutor() != nullptr); + _io_desc.inputs.resize(_executors->inputSize()); + _io_desc.outputs.resize(_executors->outputSize()); } void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape) { - // This should be called BEFORE setInput. - if (_io_desc.inputs.at(index.value()) != 0) - throw std::runtime_error("Error in calling order"); - // This will be used later to set input tensor dynamic // Note that 'compiled' model will not be updated with new_shape // but new_shape will change model input shape while 'running' the model _io_desc.dynamic_input_shapes[index] = new_shape; + + VERBOSE(Execution) << "Model input shape will be changed at the start of execute()" + << "(index: " << index << ")" << std::endl; } // TODO Remove default parameter void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length, ir::Layout layout) { - const auto input_index = primary_subgraph().getInputs().at(index); - const auto info = primary_subgraph().operands().at(input_index).info(); + const auto &info = _executors->inputInfo(index); // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor @@ -58,10 +57,10 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo() { auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index); - auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end()) - ? input_shape_sig->second.num_elements() * - onert::ir::sizeOfDataType(info.typeInfo().type()) - : info.total_size(); + auto size_required = + (input_shape_sig != _io_desc.dynamic_input_shapes.end()) + ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type()) + : info.total_size(); if (length < size_required) { @@ -89,8 +88,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con // TODO Remove default parameter void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) { - const auto output_index = primary_subgraph().getOutputs().at(index); - const auto info = primary_subgraph().operands().at(output_index).info(); + const auto &info = _executors->outputInfo(index); if (length < info.total_size()) { @@ -104,7 +102,7 @@ void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout) { - auto info = ir::OperandInfo::createStaticInfo(shape, type); + const auto &info = ir::OperandInfo::createStaticInfo(shape, type); if (length < info.total_size()) { @@ -118,21 +116,21 @@ void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout) { const auto &input_desc = _io_desc.inputs.at(index.value()); _io_desc.inputs.at(index.value()) = - std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout); + std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout); } void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout) { const auto &output_desc = _io_desc.outputs.at(index.value()); - _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>( - output_desc->info, output_desc->buffer, output_desc->size, layout); + _io_desc.outputs.at(index.value()) = + std::make_unique<OutputDesc>(output_desc->info, output_desc->buffer, output_desc->size, layout); } void Execution::execute() { VERBOSE(Execution) << "Start execution" << std::endl; - primary_executor()->execute(_io_desc); + _executors->execute(_io_desc); finished = true; VERBOSE(Execution) << "Execution finished" << std::endl; @@ -155,13 +153,41 @@ void Execution::waitFinish() bool Execution::isFinished(void) const { return finished; } +#ifdef ONERT_TRAIN +void Execution::train(uint32_t training_step) +{ + auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get()); + if (!execs) + { + throw std::runtime_error{"Supported only TrainableExecutors"}; + } + + VERBOSE(Execution) << "Start training" << std::endl; + + execs->train(_io_desc, training_step); + finished = true; + + VERBOSE(Execution) << "training finished" << std::endl; +} + +float Execution::getLoss(const ir::IOIndex &ind) +{ + auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get()); + if (!execs) + { + throw std::runtime_error{"Supported only TrainableExecutors"}; + } + + return execs->getLoss(ind); +} +#endif // ONERT_TRAIN + ir::Shape Execution::getInputShape(ir::IOIndex ind) const { auto itr = _io_desc.dynamic_input_shapes.find(ind); if (itr == _io_desc.dynamic_input_shapes.end()) { - auto operand_idx = primary_subgraph().getInputs().at(ind.value()); - return primary_subgraph().operands().at(operand_idx).shape(); + return _executors->inputInfo(ind).shape(); } else { @@ -169,15 +195,32 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const } } +// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or +// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution. +// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is +// called before execution. +// To handle both case, this method retun static shape inference result and fail will be handled on +// NNAPI frontend. ir::Shape Execution::getOutputShape(ir::IOIndex ind) const { if (!isFinished()) - throw std::runtime_error("Cannot get output shape before execution is finished"); + return _executors->outputInfo(ind).shape(); const auto &output_desc = _io_desc.outputs.at(ind.value()); return output_desc->info.shape(); } +size_t Execution::getInputTotalSize(ir::IOIndex ind) const +{ + // TODO Support dynamic shape + return _executors->inputInfo(ind).total_size(); +} + +size_t Execution::getOutputTotalSize(ir::IOIndex ind) const +{ + return _executors->outputInfo(ind).total_size(); +} + } // namespace exec } // namespace onert diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc new file mode 100644 index 000000000..fefe8a332 --- /dev/null +++ b/runtime/onert/core/src/exec/Execution.test.cc @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Execution.h" + +#include "compiler/Compiler.h" +#include "compiler/CompilerFactory.h" +#include "ir/Graph.h" +#include "ir/operation/BinaryArithmetic.h" +#include "util/TracingCtx.h" + +#include <gtest/gtest.h> +#include <thread> + +namespace +{ + +using namespace onert::ir; + +class CompiledMockUpModel +{ +public: + CompiledMockUpModel() + { + // Model: two elementwise add operation + // model input: lhs, rhs1 + // model output: second add result (result2) + // constant: rhs2 + // result1 <= (lhs + rhs) + // result2 <= (result1 + rhs2) + // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + graph = std::make_shared<Graph>(); + // 1st add operands (result1 <= lhs + rhs1) + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + static float rhs2_data[4] = {3, 1, -1, 5}; + auto operand_lhs = graph->addOperand(shape, type); + auto operand_rhs1 = graph->addOperand(shape, type); + auto operand_result1 = graph->addOperand(shape, type); + auto operand_rhs2 = graph->addOperand(shape, type); + auto operand_result2 = graph->addOperand(shape, type); + graph->operands() + .at(operand_rhs2) + .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); + // 2nd add operations (result2 <= result1 + rhs2) + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); + // Identify model inputs and outputs + graph->addInput(operand_lhs); + graph->addInput(operand_rhs1); + graph->addOutput(operand_result2); + graph->verify(); + + // Compile + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, graph); + coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + artifact = compiler.compile(); + } + +public: + std::shared_ptr<Graph> graph; + std::unique_ptr<onert::compiler::CompilerOptions> coptions; + std::shared_ptr<onert::compiler::CompilerArtifact> artifact; +}; + +class CompiledMockUpMultiModel +{ +public: + CompiledMockUpMultiModel() + { + // Model0: a float elementwise add operation + // Model0 input: lhs0, rhs0 + // Model0 output: add result (result0) + + // Model1: a qasymm8 elementwise add operation + // Model1 input: result0, rhs1 + // Model1 output: add result (result1) + + // Model2: a float elementwise add operation + // Model2 input: result0, result1 + // Model2 output: add result (result2) + + // constant: rhs2 + // result0 <= (lhs0 + rhs0) + // result1 <= (result0 + rhs1) + // result2 <= (result0 + result1) + // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + + // Update edge information + edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}); + edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1}); + edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}); + // From + const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}}; + const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}}; + // To + const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}}; + const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}}; + const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}}; + edges.edges.insert({result0, lhs1}); + edges.edges.insert({result0, lhs2}); + edges.edges.insert({result1, rhs2}); + + for (size_t i = 0; i < 3; ++i) + { + graphs.emplace_back(std::make_shared<Graph>()); + } + Shape shape{1, 2, 2, 1}; + + // Model0's add operands (result1 <= lhs0 + rhs0) + DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32}; + auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]}); + auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]}); + auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]}); + + // Model0's add operation + operation::BinaryArithmetic::Param param0; + param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param0.activation = Activation::NONE; + auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0}; + auto output_set0 = OperandIndexSequence{operand_result0}; + graphs[0]->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0)); + + // Model0's inputs/outputs + graphs[0]->addInput(operand_lhs0); + graphs[0]->addInput(operand_rhs0); + graphs[0]->addOutput(operand_result0); + graphs[0]->verify(); + + // Model1's add operands (result2 <= Model0 result + rhs1) + // static float rhs1_data[4] = {3, 1, -1, 5}; + static uint8_t rhs1_data[4] = {131, 129, 127, 133}; + const float scale = 1; + const int32_t zero_point = 128; + auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point}); + auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point}); + auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point}); + graphs[1] + ->operands() + .at(operand_rhs1) + .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4)); + + // Model1's add operation + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + graphs[1]->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); + + // Model1's inputs/outputs + graphs[1]->addInput(operand_lhs1); + graphs[1]->addOutput(operand_result1); + graphs[1]->verify(); + + // Model2's additional operands (result3 <= Model0 result + Model1 result) + auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]}); + auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]}); + auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]}); + + // Model2's add operation + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + graphs[2]->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); + + // Model1's inputs/outputs + graphs[2]->addInput(operand_lhs2); + graphs[2]->addInput(operand_rhs2); + graphs[2]->addOutput(operand_result2); + graphs[2]->verify(); + + // Compile + compile(); + } + +public: + void compile() + { + auto nnpkg = std::make_shared<onert::ir::NNPkg>(); + coptions.clear(); + for (uint16_t i = 0; i < graphs.size(); ++i) + { + coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig()); + + auto model = std::make_shared<onert::ir::Model>(); + model->push(SubgraphIndex{0}, graphs[i]); + + nnpkg->push(onert::ir::ModelIndex{i}, std::move(model)); + } + for (const auto &pkg_input : edges.pkg_inputs) + { + nnpkg->addInput(pkg_input); + } + for (const auto &pkg_output : edges.pkg_outputs) + { + nnpkg->addOutput(pkg_output); + } + for (const auto &edge : edges.edges) + { + nnpkg->addEdge(edge.from, edge.to); + } + auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions); + nnpkg.reset(); + artifact = compiler->compile(); + } + +public: + std::vector<std::shared_ptr<Graph>> graphs; + std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions; + std::shared_ptr<onert::compiler::CompilerArtifact> artifact; + ModelEdges edges; +}; + +TEST(ExecInstance, simple) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +TEST(ExecInstance, twoCompile) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors1 = mockup.artifact->_executors; + onert::exec::Execution execution1{executors1}; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16); + + // Make new executor: compile again + auto model = std::make_shared<onert::ir::Model>(); + model->push(onert::ir::SubgraphIndex{0}, graph); + auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile(); + onert::exec::Execution execution2{artifact->_executors}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support two initialized execution instance then ordered execution +TEST(ExecInstance, twoExecution) +{ + auto mockup = CompiledMockUpModel(); + auto executors = mockup.artifact->_executors; + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + onert::exec::Execution execution1{executors}; + execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16); + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + + // Make new execution + onert::exec::Execution execution2{executors}; + execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +class Inference +{ +public: + Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], + std::shared_ptr<onert::exec::IExecutors> &executors) + : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} + { + // DO NOTHING + } + + void inference(void) + { + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + onert::exec::Execution execution{_executors}; + execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16); + execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16); + execution.setOutput(output1, reinterpret_cast<void *>(_output), 16); + + execution.execute(); + } + +private: + const float (&_input1)[4]; + const float (&_input2)[4]; + float (&_output)[4]; + std::shared_ptr<onert::exec::IExecutors> &_executors; +}; + +// Support multi-thread execution +TEST(ExecInstance, twoThreads) +{ + auto mockup = CompiledMockUpModel(); + auto executors = mockup.artifact->_executors; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors}; + + std::thread t1{&Inference::inference, &execution1}; + std::thread t2{&Inference::inference, &execution2}; + + t1.join(); + t2.join(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support asynchronous execution +TEST(ExecInstance, async) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution.startExecute(); + execution.waitFinish(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +TEST(ExecInstance, multi_model_simple) +{ + auto mockup = CompiledMockUpMultiModel(); + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {7, -5, 1, -7}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +TEST(ExecInstance, multi_model_twoCompile) +{ + auto mockup = CompiledMockUpMultiModel(); + auto executors1 = mockup.artifact->_executors; + onert::exec::Execution execution1{executors1}; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {7, -5, 1, -7}; + + execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16); + + // Make new executor: compile again + mockup.compile(); + onert::exec::Execution execution2{mockup.artifact->_executors}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {1, 9, -3, 9}; + + execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support two initialized execution instance then ordered execution +TEST(ExecInstance, multi_model_twoExecution) +{ + auto mockup = CompiledMockUpMultiModel(); + auto executors = mockup.artifact->_executors; + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {7, -5, 1, -7}; + const float exe2_output_expected[4] = {1, 9, -3, 9}; + + onert::exec::Execution execution1{executors}; + execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16); + execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16); + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + + // Make new execution + onert::exec::Execution execution2{executors}; + execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16); + execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16); + + execution1.execute(); + execution1.execute(); + execution2.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Multi-model is not thread-safe yet + +// Support asynchronous execution +TEST(ExecInstance, multi_model_async) +{ + auto mockup = CompiledMockUpMultiModel(); + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {7, -5, 1, -7}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16); + execution.startExecute(); + execution.waitFinish(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +TEST(ExecInstance, multi_model_dequant_input_quant_output) +{ + auto mockup = CompiledMockUpMultiModel(); + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2} + const uint8_t input2_buffer[4] = {138, 98, 148, 88}; // {1, -3, 2, -4} + uint8_t output_buffer[4] = {}; + const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7} + float scale = 0.1; + int32_t zero_point = 128; + + onert::exec::Execution execution{executors}; + + onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point}; + execution.setInput(input1, type_info, execution.getInputShape(input1), + reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC); + execution.setInput(input2, type_info, execution.getInputShape(input2), + reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC); + execution.setOutput(output, type_info, execution.getOutputShape(output), + reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC); + execution.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +// TODO Add an unittest multi_model_quant_input_dequant_output + +} // namespace diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc index ddb1fb6a0..66610f0e0 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.cc +++ b/runtime/onert/core/src/exec/ExecutionObservee.cc @@ -26,37 +26,37 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer) _observers.emplace_back(std::move(observer)); } -void ExecutionObservee::notifyModelBegin(IExecutor *executor) +void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind) { - for (auto &o : _observers) + for (auto &&o : _observers) { - o->handleBegin(executor); + o->handleSubgraphBegin(ind); } } -void ExecutionObservee::notifyModelEnd(IExecutor *executor) +void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind) { - for (auto &o : _observers) + for (auto &&o : _observers) { - o->handleEnd(executor); + o->handleSubgraphEnd(ind); } } -void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, + ir::OperationIndex op_ind, const backend::Backend *backend) { - for (auto &o : _observers) + for (auto &&o : _observers) { - o->handleBegin(executor, op_seq, backend); + o->handleJobBegin(executor, subg_ind, op_ind, backend); } } -void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, + ir::OperationIndex op_ind, const backend::Backend *backend) { - for (auto &o : _observers) + for (auto &&o : _observers) { - o->handleEnd(executor, op_seq, backend); + o->handleJobEnd(executor, subg_ind, op_ind, backend); } } diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h index 49d409a3a..3ee1754c9 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.h +++ b/runtime/onert/core/src/exec/ExecutionObservee.h @@ -17,9 +17,11 @@ #ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__ #define __ONERT_EXEC_EXECUTION_OBSERVEE_H__ -#include <list> +#include "ExecutionObservers.h" + +#include "ir/Index.h" -#include "exec/ExecutionObservers.h" +#include <list> namespace onert { @@ -39,11 +41,11 @@ public: * @param observer Observer to be added */ void add(std::unique_ptr<IExecutionObserver> observer); - void notifyModelBegin(IExecutor *executor); - void notifyModelEnd(IExecutor *executor); - void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + void notifySubgraphBegin(ir::SubgraphIndex ind); + void notifySubgraphEnd(ir::SubgraphIndex ind); + void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind, const backend::Backend *backend); - void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind, const backend::Backend *backend); private: diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 060f874de..5245518a0 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -14,14 +14,58 @@ * limitations under the License. */ -#include "exec/ExecutionObservers.h" +#include "ExecutionObservers.h" -#include <string> +#include "../util/EventWriter.h" #include "util/logging.h" -#include "exec/IExecutor.h" -#include "misc/polymorphic_downcast.h" -#include "ir/OpSequence.h" + +#include <misc/polymorphic_downcast.h> + +#include <string> +#include <sstream> + +namespace +{ + +void setUserData(const onert::ir::Graph &g, const onert::ir::IOperation *op, + decltype(EventCollector::Event::userData) &data) +{ + // From a tensor of shape [a, b, c], this will return a string "shape(a b c)". + // String like "[1, 2, 3]" looks better but this will be considered as a list in Json + // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult + auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) { + std::string shape_str; + auto &shape = g.operands().at(operand_idx).info().shape(); + for (int i = 0; i < shape.rank(); i++) + { + if (i == 0) + shape_str = "shape(" + std::to_string(shape.dim(i)); + else + shape_str += " " + std::to_string(shape.dim(i)); + } + shape_str += ")"; + + return shape_str; + }; + + auto &inputs = op->getInputs(); + auto size = inputs.size(); + for (size_t i = 0; i < size; i++) + { + auto operand_idx = inputs.at(i); + if (operand_idx.undefined()) + continue; + + std::string key("input_shape_" + std::to_string(i)); + std::string value = build_shape_str(operand_idx); + data.emplace_back(std::make_pair(key, value)); + } + + // add other userData as needed +} + +} // namespace namespace onert { @@ -29,8 +73,8 @@ namespace onert namespace exec { -void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *, - const onert::backend::Backend *backend) +void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex, + ir::OperationIndex, const onert::backend::Backend *backend) { _timer = backend->config()->timer(); if (_timer == nullptr) @@ -38,14 +82,14 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence _timer->handleBegin(); } -void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, + const ir::OperationIndex op_ind, const backend::Backend *backend) { _timer->handleEnd(); const auto timer_res = _timer->getTime(); - // NOTE This assumes there is just one operation in a op_seq - const auto &node = _graph.operations().at(op_seq->operations().at(0)); + // NOTE This assumes there is just one operation in a op + const auto &node = _graph.operations().at(op_ind); auto node_name = node.name(); VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl; @@ -54,7 +98,7 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, ir::DataType::QUANT_UINT8_ASYMM; uint32_t size = 0; - for (const auto &ind : node.getInputs() + node.getOutputs()) + for (const auto &ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED) { size += exec->graph().operands().at(ind).info().total_size(); } @@ -69,64 +113,59 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, } }; -ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph) - : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}, _graph{graph} +TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx) + : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph}, + _tracing_ctx{tracing_ctx} { + _event_writer = EventWriter::get(filepath); + _event_writer->startToUse(); } -ChromeTracingObserver::~ChromeTracingObserver() +TracingObserver::~TracingObserver() { try { - _recorder.writeToFile(_ofs); + _event_writer->readyToFlush(std::move(_recorder)); } catch (const std::exception &e) { - std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl; + std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl; } } -void ChromeTracingObserver::handleBegin(IExecutor *) +void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind) { - _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"}); + _collector.onEvent( + EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::BEGIN, subg_ind.value()}); } -void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind, + ir::OperationIndex op_ind, const backend::Backend *backend) { std::string backend_id = backend->config()->id(); - _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, - opSequenceTag(op_seq, _graph.operations())}); + const auto &op = _graph.operations().at(op_ind); + auto ev = EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::BEGIN, + subg_ind.value(), backend_id, + op_ind.value(), op.name()}; + // add shape of inputs + setUserData(_graph, &op, ev.userData); + _collector.onEvent(ev); } -void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind, + ir::OperationIndex op_ind, const backend::Backend *backend) { std::string backend_id = backend->config()->id(); - _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id, - opSequenceTag(op_seq, _graph.operations())}); + _collector.onEvent(EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::END, + subg_ind.value(), backend_id, op_ind.value(), + _graph.operations().at(op_ind).name()}); } -void ChromeTracingObserver::handleEnd(IExecutor *) +void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind) { - _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"}); -} - -std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq, - const ir::Operations &operations) -{ - if (op_seq->size() == 0) - return "Empty OpSequence"; - - const auto &first_op_idx = op_seq->operations().at(0); - const auto &first_op_node = operations.at(first_op_idx); - std::string tag = "$" + std::to_string(first_op_idx.value()); - tag += " " + first_op_node.name(); - if (op_seq->size() > 1) - { - tag += " (+" + std::to_string(op_seq->size() - 1) + ")"; - } - return tag; + _collector.onEvent( + EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::END, subg_ind.value()}); } } // namespace exec diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index ac0076ed2..7e93ecf7c 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -17,13 +17,16 @@ #ifndef __ONERT_EXEC_OBSREVERS_H__ #define __ONERT_EXEC_OBSREVERS_H__ -#include "exec/IFunction.h" -#include "ir/OpSequence.h" #include "ExecTime.h" -#include "util/ITimer.h" +#include "../util/EventCollector.h" +#include "../util/EventRecorder.h" +#include "../util/EventWriter.h" + #include "exec/IExecutor.h" -#include "util/EventCollector.h" -#include "util/EventRecorder.h" +#include "ir/Index.h" +#include "ir/IOperation.h" +#include "util/ITimer.h" +#include "util/TracingCtx.h" namespace onert { @@ -33,13 +36,15 @@ class IExecutionObserver { public: /// @brief Invoked just before model (not individual operation) execution begins - virtual void handleBegin(IExecutor *) { return; } + virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; } - virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; - virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; + virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) = 0; + virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) = 0; /// @brief Invoked just after model (not individual operation) execution ends - virtual void handleEnd(IExecutor *) { return; } + virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; } virtual ~IExecutionObserver() = default; }; @@ -48,13 +53,15 @@ class ProfileObserver : public IExecutionObserver { public: explicit ProfileObserver(std::shared_ptr<ExecTime> et, const ir::Graph &graph) - : _et(std::move(et)), _graph(graph) + : _et(std::move(et)), _graph(graph) { } - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; - void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); } + void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); } private: std::unique_ptr<util::ITimer> _timer; @@ -62,24 +69,25 @@ private: const ir::Graph &_graph; }; -class ChromeTracingObserver : public IExecutionObserver +class TracingObserver : public IExecutionObserver { public: - ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph); - ~ChromeTracingObserver(); - void handleBegin(IExecutor *) override; - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *) override; - -private: - static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations); + TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx); + ~TracingObserver(); + void handleSubgraphBegin(ir::SubgraphIndex) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; + void handleSubgraphEnd(ir::SubgraphIndex) override; private: - std::ofstream _ofs; - EventRecorder _recorder; + std::unique_ptr<EventRecorder> _recorder; EventCollector _collector; const ir::Graph &_graph; + EventWriter *_event_writer; + const util::TracingCtx *_tracing_ctx; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index f835a9675..0bc088b02 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -16,10 +16,9 @@ #include "ExecutorBase.h" -#include "backend/ITensor.h" -#include "backend/controlflow/UserTensor.h" -#include "backend/cpu_common/Tensor.h" -#include "util/logging.h" +#include "ShapeConverter.h" + +#include <misc/polymorphic_downcast.h> namespace onert { @@ -27,68 +26,29 @@ namespace exec { ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, + backend::BackendContexts &&backend_contexts, const compiler::TensorRegistries &tensor_regs, - backend::TensorManagerSet &&tensor_mgrs) - : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _tensor_mgrs{std::move(tensor_mgrs)}, _mutex() + const util::TracingCtx *tracing_ctx) + : _lowered_graph{std::move(lowered_graph)}, + _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(), + _tracing_ctx(tracing_ctx) { - // TODO Fix the way of knowing whether it is primary or not - bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty()); - if (!primary_executor) - { - auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector<std::shared_ptr<backend::ITensor>> list; - for (auto ind : ind_seq) - { - std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - DynAllocInfo dyn_alloc_info{ind}; - _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); - list.push_back(tensor); - } - return list; - }; - auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector<std::shared_ptr<backend::ITensor>> list; - for (auto ind : ind_seq) - { - std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - DynAllocInfo dyn_alloc_info{ind}; - _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); - list.push_back(tensor); - } - return list; - }; - _input_tensors = build_input_tensor_list(_graph.getInputs()); - _output_tensors = build_output_tensor_list(_graph.getOutputs()); - } - else - { - assert(input_tensors.size() == _graph.getInputs().size()); - assert(output_tensors.size() == _graph.getOutputs().size()); - for (uint32_t i = 0; i < input_tensors.size(); i++) + auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { + assert(tensors.empty()); + for (auto &&ind : ind_seq) { - auto tensor = input_tensors[i]; - auto ind = _graph.getInputs().at(i); - DynAllocInfo dyn_alloc_info{ind}; - _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); + backend::ITensor *tensor = tensor_regs.getITensor(ind); + assert(tensor != nullptr); + auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor); + tensors.push_back(io_tensor); } - for (uint32_t i = 0; i < output_tensors.size(); i++) - { - auto tensor = output_tensors[i]; - auto ind = _graph.getOutputs().at(i); - DynAllocInfo dyn_alloc_info{ind}; - _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); - } - } + }; + build_tensor_list(_graph.getInputs(), _input_tensors); + build_tensor_list(_graph.getOutputs(), _output_tensors); } -void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors, - const std::shared_ptr<IPermuteFunction> &pre_fn) +void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) { // For thread-safe, use mutex // TODO: if all used backends on this executor are thread-safe, @@ -96,41 +56,43 @@ void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> // Deadlock occurs when an Executor is called recursively. std::lock_guard<std::mutex> lock(_mutex); - assert(src_tensors.size() == _graph.getInputs().size()); - assert(src_tensors.size() == _input_tensors.size()); - for (uint32_t n = 0; n < _graph.getInputs().size(); ++n) + assert(inputs.size() == _graph.getInputs().size()); + assert(inputs.size() == _input_tensors.size()); + for (uint32_t n = 0; n < inputs.size(); ++n) { - // when user changes input shape, the input tensor is dynamic and its memory is not allocated. - // This code find the info to allocate dynamic tensor, and allocate memory based on the source - // tensor's shape set by caller. - const auto src_tensor = src_tensors[n]; + const auto input = inputs[n]; + assert(input->buffer() != nullptr); auto input_tensor = _input_tensors[n]; - // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors - if (src_tensor != nullptr && input_tensor != nullptr) + assert(input_tensor != nullptr); + if (input != nullptr) { - auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]); - const auto orig_input_shape = input_tensor->getShape(); - const auto changed_input_shape = - convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout()); + const auto &orig_input_shape = input_tensor->orig_info().shape(); + const auto &changed_input_shape = + convertShape(input->getShape(), input->layout(), input_tensor->orig_layout()); + if (input_tensor->get_info().shape() != changed_input_shape) + { + // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info` + // rather than interface methods to avoid virtual function calls. + input_tensor->setShapeOfIPortableTensor(changed_input_shape); + } if (orig_input_shape != changed_input_shape) { - if (dyn_alloc_info == _input_to_dyn_alloc_info.end()) - { - // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor - throw std::runtime_error("Unknown dim is found at execution time for a backend that " - "does not support dynamic tensor"); - } - else - { - input_tensor->set_dynamic(); - } + input_tensor->set_dynamic(); } } + input_tensor->setTensor(input); } - // TODO Move calling permute_fn.run() into executeImpl() - assert(pre_fn); - pre_fn->run(); + assert(outputs.size() == _graph.getOutputs().size()); + assert(outputs.size() == _output_tensors.size()); + for (uint32_t n = 0; n < outputs.size(); ++n) + { + const auto output = outputs[n]; + // assert(dst_tensor->buffer() != nullptr); + auto output_tensor = _output_tensors[n]; + assert(output_tensor != nullptr); + output_tensor->setTensor(output); + } executeImpl(); } @@ -146,32 +108,50 @@ void ExecutorBase::execute(const IODescription &desc) assert(_input_tensors.size() == desc.inputs.size()); for (uint32_t i = 0; i < _input_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]); - assert(tensor); + auto tensor = _input_tensors[i]; + + // TODO Check if (desc.inputs[i] == nullptr) + // TODO Better design for ITensor? (we need const_cast as ITensor is writable) + tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), + desc.inputs[i]->size); + auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i}); if (input_shape != desc.dynamic_input_shapes.end()) { tensor->set_dynamic(); tensor->setShape(input_shape->second); + /* + * Changes tensor shape and allocate memory since its shape was changed + * perhaps by nnfw_set_input_tensorinfo() + * + * Cases are: + * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute() + * (a) (b) + * + * at (a), operand is static, tensor is static - memory dealloc is not needed + * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager) + * at (b), operand is static, tensor is dynamic - memory dealloc is needed + * + * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute() + * (a) (b) + * + * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed + * since it has not been allocated yet + * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed + */ + tensor->applyShape(input_shape->second); } - // TODO Better design for ITensor? (we need const_cast as ITensor is writable) - tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), - desc.inputs[i]->size); - - handleDynamicInputTensor(ir::IOIndex{i}, desc); } assert(_output_tensors.size() == desc.outputs.size()); for (uint32_t i = 0; i < _output_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]); - assert(tensor); + auto tensor = _output_tensors[i]; + + if (desc.outputs[i] == nullptr) + throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."}; + tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size); tensor->set_dynamic(); // It can't be resized but shape could change - // TODO Better design for ITensor? (we need const_cast as ITensor is writable) - tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)), - desc.outputs[i]->size); } executeImpl(); @@ -190,51 +170,13 @@ void ExecutorBase::execute(const IODescription &desc) // set shape of outputDesc to tensor shape since tensor can be dynamic const auto output_tensor_shape = _output_tensors[n]->getShape(); output.info.shape( - convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout)); - } -} - -/** - * @brief Changes tensor shape and allocate memory - * if input shape was changed by nnfw_set_input_tensorinfo() - * - * @note Cases are: - * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute() - * (a) (b) - * - * at (a), operand is static, tensor is static - memory dealloc is not needed - * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager) - * at (b), operand is static, tensor is dynamic - memory dealloc is needed - * - * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute() - * (a) (b) - * - * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed - * since it has not been allocated yet - * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed - */ -void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc) -{ - auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind); - if (shape_sig_found != desc.dynamic_input_shapes.end()) - { - auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]); - if (dyn_alloc_info == _input_to_dyn_alloc_info.end()) - throw std::runtime_error("Unknown dim is found at execution time for a backend that " - "does not support dynamic tensor"); - - auto changed_input_shape = shape_sig_found->second; - auto operand_ind = dyn_alloc_info->second.ind; - - auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager(); - assert(dyn_tensor_manager); - dyn_tensor_manager->applyShape(operand_ind, changed_input_shape); + convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout)); } } bool ExecutorBase::hasDynamicInput() { - for (auto &tensor : _input_tensors) + for (auto &&tensor : _input_tensors) { if (tensor->is_dynamic()) return true; diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index a13be7dbf..4f97de922 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -17,25 +17,20 @@ #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__ #define __ONERT_EXEC_EXECUTOR_BASE_H__ -#include <mutex> +#include "ExecutionObservee.h" +#include "../backend/builtin/IOTensor.h" +#include "../compiler/TensorRegistries.h" -#include "IPermuteFunction.h" -#include "Source.h" -#include "exec/ExecutionObservers.h" -#include "Sink.h" -#include "ShapeConverter.h" -#include "exec/IExecutor.h" #include "compiler/LoweredGraph.h" -#include "ir/LowerInfoMap.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" -#include "exec/ExecTime.h" -#include "exec/IFunction.h" -#include "backend/IDynamicTensorManager.h" -#include "backend/ITensorManager.h" -#include "exec/ExecutionObservee.h" -#include "compiler/TensorRegistries.h" -#include <list> +#include "exec/IExecutor.h" +#include "exec/IODescription.h" +#include "ir/Graph.h" +#include "ir/OperationIndexMap.h" +#include "util/TracingCtx.h" + +#include <memory> +#include <mutex> +#include <vector> namespace onert { @@ -51,26 +46,18 @@ public: * @param tensor_builders Tensor builders that are currently used */ ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, - backend::TensorManagerSet &&tensor_mgrs); + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx); virtual ~ExecutorBase() = default; - const ir::Graph &graph() final { return _graph; } - - /** - * @brief Execute without IODescription - * - * @param src_tensor Tensor list that will be copied to input tensors of this - * @param pre_fn The permutation function that copy from src_tensor to input tensors of this - */ - void execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors, - const std::shared_ptr<IPermuteFunction> &pre_fn); + const ir::Graph &graph() const final { return _graph; } void execute(const IODescription &desc) final; + void execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) override; + // Used only in Dataflow and Parallel Executors void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final { @@ -81,17 +68,16 @@ public: void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; - const std::vector<std::shared_ptr<backend::ITensor>> &getInputTensors() const + const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override { return _input_tensors; } - const std::vector<std::shared_ptr<backend::ITensor>> &getOutputTensors() const + const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override { return _output_tensors; } - - const DynAllocInfoMap &getInputsDynamicAllocInfo() const { return _input_to_dyn_alloc_info; } + backend::BackendContexts &getBackendContexts() { return _backend_contexts; } protected: /** @@ -103,16 +89,12 @@ protected: ExecutionObservee _subject; std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; std::unique_ptr<compiler::LoweredGraph> _lowered_graph; + backend::BackendContexts _backend_contexts; const ir::Graph &_graph; - std::vector<std::shared_ptr<backend::ITensor>> _input_tensors; - std::vector<std::shared_ptr<backend::ITensor>> _output_tensors; - DynAllocInfoMap _input_to_dyn_alloc_info; - DynAllocInfoMap _output_to_dyn_alloc_info; - backend::TensorManagerSet _tensor_mgrs; + std::vector<backend::builtin::IOTensor *> _input_tensors; + std::vector<backend::builtin::IOTensor *> _output_tensors; std::mutex _mutex; - -private: - void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc); + const util::TracingCtx *_tracing_ctx; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc new file mode 100644 index 000000000..8a1be3df4 --- /dev/null +++ b/runtime/onert/core/src/exec/Executors.cc @@ -0,0 +1,649 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Executors.h" + +#include "../backend/builtin/IOTensor.h" + +namespace +{ + +using namespace onert; + +int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs, + const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + const ir::IOIndex &io_index) +{ + for (size_t i = 0; i < pkg_inputs.size(); i++) + { + auto &input_desc = pkg_inputs[i]; + if ((std::get<ir::ModelIndex>(input_desc) == model_index) && + (std::get<ir::SubgraphIndex>(input_desc) == subg_index) && + (std::get<ir::IOIndex>(input_desc) == io_index)) + return static_cast<int32_t>(i); + } + return -1; +} + +int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs, + const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + const ir::IOIndex &io_index) +{ + for (size_t i = 0; i < pkg_outputs.size(); i++) + { + auto &input_desc = pkg_outputs[i]; + if ((std::get<ir::ModelIndex>(input_desc) == model_index) && + (std::get<ir::SubgraphIndex>(input_desc) == subg_index) && + (std::get<ir::IOIndex>(input_desc) == io_index)) + return static_cast<int32_t>(i); + } + return -1; +} + +} // namespace + +namespace onert +{ +namespace exec +{ + +class Executors::EdgeTensor : public backend::builtin::IOTensor +{ +public: + EdgeTensor(const ir::OperandInfo &info, ir::Layout layout) + : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0} + { + } + ~EdgeTensor() = default; + + void allocate_buffer() + { + const auto total_size = orig_info().total_size(); + _buffer = std::make_unique<uint8_t[]>(total_size); + _ref_count = 1; + + // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs + // is using tensor that does not inherit IPortableTensor, Permute operation is added + // and all inputs/outputs become IPortableTensor at compile stage. + // This allows user's buffers to be set to inputs/outputs of executors. + setUserTensor(_buffer.get(), total_size); + } + + void increase_ref() { _ref_count++; } + + void decrease_ref() + { + assert(_ref_count > 0); + _ref_count--; + if (_ref_count == 0) + { + _buffer.reset(); + setUserTensor(nullptr, orig_info().total_size()); + } + } + +private: + std::unique_ptr<uint8_t[]> _buffer; + int32_t _ref_count; +}; + +void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) +{ + _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec)); +} + +IExecutor *Executors::at(const ir::ModelIndex &model_index, + const ir::SubgraphIndex &subg_index) const +{ + return _executors.at(std::make_pair(model_index, subg_index)).get(); +} + +uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); } + +uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); } + +const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const +{ + auto const desc = _model_edges->pkg_inputs[index.value()]; + auto const model_index = std::get<0>(desc); + auto const subg_index = std::get<1>(desc); + auto const io_index = std::get<2>(desc); + auto const executor = at(model_index, subg_index); + return executor->getInputTensors().at(io_index.value())->orig_info(); +} + +const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const +{ + auto const desc = _model_edges->pkg_outputs[index.value()]; + auto const model_index = std::get<0>(desc); + auto const subg_index = std::get<1>(desc); + auto const io_index = std::get<2>(desc); + auto const executor = at(model_index, subg_index); + return executor->getOutputTensors().at(io_index.value())->orig_info(); +} + +// Allow below edges only +// m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2' +void Executors::checkSupportedMultimodel() const +{ + // If package includes no-connection model, model_count is less than real model count in package. + // Then this method will throw exception based on model index + // 1st model: input assumption + // Otherwise: edges assumption + + // Assumption: edges + // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2' + for (auto &&edge : _model_edges->edges) + { + auto const model_from = std::get<ir::ModelIndex>(edge.from); + auto const model_to = std::get<ir::ModelIndex>(edge.to); + auto const subg_from = std::get<ir::SubgraphIndex>(edge.from); + auto const subg_to = std::get<ir::SubgraphIndex>(edge.to); + + if (model_from.value() == model_to.value()) + { + throw std::runtime_error{"Multi model's edge set has invalid edge"}; + } + + if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) || + (subg_to != ir::SubgraphIndex{0})) + throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"}; + } + + // Assumption: package inputs + // All 1st model inputs come from package input if always m1 < m2 + { + auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); + auto search_first_model = [&](const ir::IOIndex &input_index) { + for (const auto &input : _model_edges->pkg_inputs) + { + if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) || + (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) || + (std::get<ir::IOIndex>(input) == input_index)) + return true; + } + + return false; + }; + + for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++) + { + if (!search_first_model(ir::IOIndex{i})) + throw std::runtime_error{"Cannot find 1st model's input buffer"}; + } + } + + // Check whether nnpkg outputs and Edge `from` are duplicated + for (const auto &edge : _model_edges->edges) + { + if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) != + _model_edges->pkg_outputs.end()) + { + throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs " + "with `from` of edges yet"}; + } + } +} + +void Executors::createEdgeQuantLayers() +{ + if (_is_created_edge_quant_layers) + { + return; + } + + // Create EdgeTensor for edges between executors + for (const auto &pair : _edge_map) + { + const auto &from_iodesc = pair.first; + const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc); + const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc); + const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc); + + const auto from_executor = _executors.at({from_model_index, from_subg_index}).get(); + const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value()); + + const auto &from_info = from_tensor->orig_info(); + const auto from_layout = from_tensor->orig_layout(); + _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout); + } + + // Append type-aware quantization layer for edges between executors + for (const auto &executor_pair : _executors) + { + const auto &executor_index = executor_pair.first; + const auto &model_index = executor_index.first; + const auto &subg_index = executor_index.second; + + std::vector<backend::ITensor *> inputs; + std::vector<backend::ITensor *> outputs; + for (const auto &pair : _edge_map) + { + const auto &from_iodesc = pair.first; + if (std::get<ir::ModelIndex>(from_iodesc) == model_index && + std::get<ir::SubgraphIndex>(from_iodesc) == subg_index) + { + const auto from_tensor = _edge_tensors[from_iodesc].get(); + const auto &to_list = pair.second; + + for (const auto &to_iodesc : to_list) + { + const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc); + const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc); + const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc); + + const auto to_executor = _executors.at({to_model_index, to_subg_index}).get(); + const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value()); + + // TODO Unify tensors with the same `from` tensor and same type + if (from_tensor->data_type() != to_tensor->data_type()) + { + assert(inputs.size() == outputs.size()); + const auto &to_info = + to_executor->getInputTensors().at(to_io_index.value())->orig_info(); + const auto to_layout = to_tensor->orig_layout(); + inputs.emplace_back(from_tensor); + + auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout); + outputs.emplace_back(type_aware_quant_tensor.get()); + + _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor); + } + } + } + } + + auto layer = std::make_unique<PermuteLayer>(inputs, outputs); + layer->prepare(); + _edge_quant_layers[{model_index, subg_index}] = std::move(layer); + } + + _is_created_edge_quant_layers = true; +} + +void Executors::CreatePkgIOTensors(const IODescription &desc) +{ + for (const auto &pkg_input : _model_edges->pkg_inputs) + { + // Create IOTensor for nnpkg inputs + const auto &model_index = std::get<ir::ModelIndex>(pkg_input); + const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input); + const auto &io_index = std::get<ir::IOIndex>(pkg_input); + const auto input_pkg_index = + find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index); + if (input_pkg_index == -1) + throw std::runtime_error{"Cannot find multi model input index"}; + auto input_desc = desc.inputs[input_pkg_index].get(); + _pkg_input_tensors[pkg_input] = + std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout); + } + + for (const auto &pkg_output : _model_edges->pkg_outputs) + { + // Create IOTensor for nnpkg outputs + const auto &model_index = std::get<ir::ModelIndex>(pkg_output); + const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output); + const auto &io_index = std::get<ir::IOIndex>(pkg_output); + const auto output_pkg_index = + find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index); + if (output_pkg_index == -1) + throw std::runtime_error{"Cannot find multi model output index"}; + auto output_desc = desc.outputs[output_pkg_index].get(); + _pkg_output_tensors[pkg_output] = + std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout); + } +} + +void Executors::createPkgIOQuantLayers(const IODescription &desc) +{ + // Append type-aware quantization layer for nnpkg inputs/outputs between executors + for (const auto &pair : _executors) + { + const auto &executor_index = pair.first; + const auto &model_index = executor_index.first; + const auto &subg_index = executor_index.second; + const auto executor = pair.second.get(); + + // Find pkg inputs of current executor + std::vector<ir::IODesc> pkg_inputs; + for (const auto &pkg_input : _model_edges->pkg_inputs) + { + if (std::get<ir::ModelIndex>(pkg_input) == model_index && + std::get<ir::SubgraphIndex>(pkg_input) == subg_index) + { + pkg_inputs.emplace_back(pkg_input); + } + } + std::vector<backend::ITensor *> src_tensors; + std::vector<backend::ITensor *> dst_tensors; + for (const auto &pkg_input : pkg_inputs) + { + const auto &io_index = std::get<ir::IOIndex>(pkg_input); + const auto input_pkg_index = + find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index); + if (input_pkg_index == -1) + throw std::runtime_error{"Cannot find multi model input index"}; + auto input_desc = desc.inputs[input_pkg_index].get(); + + // Create EdgeTensor for nnpkg input if type is different + const auto input_tensor = + executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value()); + const auto &orig_info = input_tensor->orig_info(); + if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type()) + { + const auto orig_layout = input_tensor->orig_layout(); + auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout); + _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor); + + // Append type-aware quantization layer's inputs/outputs + src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get()); + dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get()); + } + } + + // Create type-aware quantization layer for nnpkg inputs + auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors); + pkg_input_layer->prepare(); + _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer); + + // Find pkg outputs of current executor + std::vector<ir::IODesc> pkg_outputs; + for (const auto &pkg_output : _model_edges->pkg_outputs) + { + if (std::get<ir::ModelIndex>(pkg_output) == model_index && + std::get<ir::SubgraphIndex>(pkg_output) == subg_index) + { + pkg_outputs.emplace_back(pkg_output); + } + } + src_tensors.clear(); + dst_tensors.clear(); + // Create Tensors of nnpkg outputs for type-aware quantization + for (const auto &pkg_output : pkg_outputs) + { + const auto &io_index = std::get<ir::IOIndex>(pkg_output); + const auto output_pkg_index = + find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index); + if (output_pkg_index == -1) + throw std::runtime_error{"Cannot find multi model output index"}; + auto output_desc = desc.outputs[output_pkg_index].get(); + + // Create EdgeTensor for nnpkg output if type is different + const auto output_tensor = + executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value()); + const auto &orig_info = output_tensor->orig_info(); + if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type()) + { + const auto orig_layout = output_tensor->orig_layout(); + auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout); + _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor); + + // Append type-aware quantization layer's inputs/outputs + src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get()); + dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get()); + } + } + + // Create type-aware quantization layer for nnpkg outputs + auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors); + pkg_output_layer->prepare(); + _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer); + } +} + +void Executors::execute(const IODescription &desc) +{ + // Check supported multi model package + checkSupportedMultimodel(); + + // TODO Move creating type-aware quantization layers for edges in compilation stage + createEdgeQuantLayers(); + + // TODO Create IOTensors only once and recreate them only if nnpkg info changes + CreatePkgIOTensors(desc); + + // TODO Create type-aware quantization layers only once and recreate them only if type changes + createPkgIOQuantLayers(desc); + + // TODO Find better way to schedule order of executors + auto const model_count = modelCount(); + + auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + const ir::IOIndex &io_index) { + for (const auto &edge : _model_edges->edges) + { + if ((std::get<ir::ModelIndex>(edge.to) == model_index) && + (std::get<ir::SubgraphIndex>(edge.to) == subg_index) && + (std::get<ir::IOIndex>(edge.to) == io_index)) + return edge.from; + } + + throw std::runtime_error{"Cannot find edge for model input"}; + }; + + // Execute each model + // NOTE May be better to use vector instead of unordered_map for _executors + for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++) + { + // Find executor + auto executor = at(model_index, ir::SubgraphIndex{0}); + + // Set IOTensors + // TODO Set internal IOTensors only once + std::vector<backend::IPortableTensor *> inputs_inter; + std::vector<backend::IPortableTensor *> outputs_inter; + const auto &input_tensors = executor->getInputTensors(); + const auto &output_tensors = executor->getOutputTensors(); + auto const input_size = input_tensors.size(); + auto const output_size = output_tensors.size(); + inputs_inter.resize(input_size); + outputs_inter.resize(output_size); + + // Set inputs of executor + // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor + for (uint32_t i = 0; i < input_size; i++) + { + const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index, + ir::SubgraphIndex{0}, ir::IOIndex{i}); + const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + if (input_pkg_index != -1) + { + // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors + if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end()) + { + _pkg_input_quant_tensors[input_io_desc]->allocate_buffer(); + + inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get(); + } + else + { + inputs_inter[i] = _pkg_input_tensors[input_io_desc].get(); + } + + // Set buffer of IOTensor + auto input_desc = desc.inputs[input_pkg_index].get(); + // TODO Remove const_cast (we need const_cast as ITensor is writable) + _pkg_input_tensors[input_io_desc]->setUserTensor( + reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size); + } + else + { + auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}); + const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc); + const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc); + const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value(); + + // Supported only sequantial execution of models + assert(from_model_index.value() < model_index.value()); + assert(from_subg_index.value() == 0); + const auto from_executor = _executors.at({from_model_index, from_subg_index}).get(); + const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end()) + { + inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex); + } + else + { + inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get(); + } + assert(inputs_inter[i]->buffer() != nullptr); + } + } + + // Set outputs of executor + for (uint32_t i = 0; i < output_size; i++) + { + const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index, + ir::SubgraphIndex{0}, ir::IOIndex{i}); + const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + if (output_pkg_index != -1) + { + // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors + if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end()) + { + _pkg_output_quant_tensors[output_io_desc]->allocate_buffer(); + + outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get(); + } + else + { + outputs_inter[i] = _pkg_output_tensors[output_io_desc].get(); + } + + // Set buffer of IOTensor + auto output_desc = desc.outputs[output_pkg_index].get(); + _pkg_output_tensors[output_io_desc]->setUserTensor( + reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size); + } + else + { + // Allocate buffer of `from` tensors + const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + _edge_tensors[from_iodesc]->allocate_buffer(); + outputs_inter[i] = _edge_tensors[from_iodesc].get(); + + // Allocate buffer of tensors for type-aware quantization + for (const auto &to_iodesc : _edge_map[from_iodesc]) + { + _edge_tensors[from_iodesc]->increase_ref(); + if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end()) + { + auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get(); + type_aware_quant_tensor->allocate_buffer(); + + _edge_tensors[from_iodesc]->decrease_ref(); + } + } + } + } + + _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run(); + + executor->execute(inputs_inter, outputs_inter); + + _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run(); + _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run(); + + // Release input buffers that are no longer needed + for (uint32_t i = 0; i < input_size; i++) + { + const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index, + ir::SubgraphIndex{0}, ir::IOIndex{i}); + + const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + if (input_pkg_index == -1) + { + if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end()) + { + // Decrease reference count of tensor for type-aware quantization if input tensor is the + // tensor + const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end()) + { + _edge_quant_tensors[to_iodesc]->decrease_ref(); + } + } + else + { + // Decrease reference count of `from` tensor if input tensor is the `from` tensor + const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}); + _edge_tensors[from_iodesc]->decrease_ref(); + + // Decrease reference count of nnpkg inputs + if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end()) + { + _pkg_input_quant_tensors[to_iodesc]->decrease_ref(); + } + } + } + } + + // Release output buffers if those buffers are no longer used other executors because of + // type-aware quantization + // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type + for (uint32_t i = 0; i < output_size; i++) + { + auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}}; + + // Check if other executors will use the buffer of edge tensor + const auto &to_list = _edge_map[from_iodesc]; + if (to_list.size() == 0) + { + // This condition means `from_iodesc` tensor is an output of nnpkg + continue; + } + + bool to_be_release = + !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) { + // This condition means another executor uses the buffer of edge tensor + return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end(); + }); + + if (to_be_release) + { + // This edge tensor's buffer won't be used in other executors + // Tensors for type-aware quantization take over the role of this edge tensor instead + _edge_tensors[from_iodesc]->decrease_ref(); + } + + // Decrease reference count of nnpkg outputs + if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end()) + { + _pkg_output_quant_tensors[from_iodesc]->decrease_ref(); + } + } + } +} + +// modelCount() iterates _executors. +// It assumes that Compiler will generate Executor for all models and _executors includes all +// generated Executor. +// If nnpackage includes model(s) which has no connection and Compiler does not +// generate Executor for them, modelCount() return less value than real model count. +uint16_t Executors::modelCount() const +{ + uint16_t model_count = 0; + for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) != + _executors.end(); + model_count++) + ; + + return model_count; +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/Executors.h b/runtime/onert/core/src/exec/Executors.h new file mode 100644 index 000000000..ac7489186 --- /dev/null +++ b/runtime/onert/core/src/exec/Executors.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_EXECUTORS_H__ +#define __ONERT_EXEC_EXECUTORS_H__ + +#include "exec/IExecutors.h" +#include "ir/NNPkg.h" +#include "IPermuteFunction.h" + +namespace std +{ + +template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>> +{ + size_t + operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const + noexcept + { + return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value()); + } +}; + +} // namespace std + +namespace onert +{ +namespace exec +{ + +/** + * @brief Class to gather executors + */ +class Executors : public IExecutors +{ +public: + Executors(void) = delete; + Executors(std::unique_ptr<ir::ModelEdges> model_edges) + : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{}, + _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false}, + _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{}, + _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{} + { + for (const auto &edge : _model_edges->edges) + { + _edge_map[edge.from].emplace_back(edge.to); + } + } + Executors(const Executors &) = delete; + Executors(Executors &&) = default; + ~Executors() = default; + + // TODO Use Executor index + void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) override; + + IExecutor *at(const ir::ModelIndex &model_index, + const ir::SubgraphIndex &subg_index) const override; + + uint32_t inputSize() const override; + + uint32_t outputSize() const override; + + const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override; + + const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override; + + void execute(const IODescription &desc) override; + +private: + void checkSupportedMultimodel() const; + void createEdgeQuantLayers(); + void CreatePkgIOTensors(const IODescription &desc); + void createPkgIOQuantLayers(const IODescription &desc); + uint16_t modelCount() const; + +private: + // TODO Remove this class + class PermuteLayer : public exec::IPermuteFunction + { + public: + PermuteLayer(const std::vector<backend::ITensor *> &inputs, + const std::vector<backend::ITensor *> &outputs) + { + assert(inputs.size() == outputs.size()); + _src_tensors = inputs; + _dst_tensors = outputs; + } + virtual ~PermuteLayer() {} + void optimize() override {} + }; + + class EdgeTensor; + +private: + std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>> + _executors; + + // NOTE _model_edges may use different struct type for executor implementation + std::unique_ptr<ir::ModelEdges> _model_edges; + std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map; + + /** + * @brief Type-aware quantization layers for edges between executors + * + */ + // TODO Move variables related to type-aware quantization for edges into compilation stage + // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer + std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>> + _edge_quant_layers; + + /** + * @brief Tensors for type-aware quantization of edges + * Key: `to` IODesc, Value: EdgeTensor + */ + // + // Q: Why is Key `to` IODesc + // A: these tensors are currently created depending on the type of `to` + // TODO Unify tensors with the same `from` tensor and same type + // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr. + std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors; + + /** + * @brief Tensors for edges between executors that are not related to type-aware quantization + * Key: `from` IODesc, Value: EdgeTensor + */ + // Q: Why is Key `from` IODesc + // A: `from` can be connected to multiple `to` + // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr. + std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors; + /** + * @brief Whether type-aware quantization layers for edges between executors are created + * + */ + // TODO Remove this member after the creation of type-aware quantization layers for edges + // is moved into compilation stage + bool _is_created_edge_quant_layers; + + // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer + std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>> + _pkg_input_quant_layers; + // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer + std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>> + _pkg_output_quant_layers; + // Edge tensors of nnpkg inputs/outputs for type-aware quantization + std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors; + std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors; + // IOTensors for user buffer + std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors; + std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_EXECUTORS_H__ diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc index fb31f7582..578123a54 100644 --- a/runtime/onert/core/src/exec/FunctionSequence.cc +++ b/runtime/onert/core/src/exec/FunctionSequence.cc @@ -16,8 +16,6 @@ #include "exec/FunctionSequence.h" -#include "ir/Operation.h" -#include "backend/IDynamicTensorManager.h" #include "backend/ITensorRegistry.h" #include "util/logging.h" @@ -28,19 +26,19 @@ namespace exec void FunctionSequence::run() { - // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx) { - if (_dynamic_tensor_ctx->op_seq->size() != _functions.size()) - throw std::runtime_error("operation and functions should be mapped one by one"); + // acl_cl and acl_neon backend don't support dynamic shape. + // _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon + // Thus, those two bakends cannot reach here. + + // Do dynamic shape inference + _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); - auto op_seq_iter = _dynamic_tensor_ctx->op_seq->begin(); for (const auto &function : _functions) { - // set shape of output and allocate memory when needed - auto &op = _dynamic_tensor_ctx->operations->at(*op_seq_iter); - op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); - + // NOTE the function could be also FunctionSequence so we do this + // TODO Remove this or do this recursively auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get()); if (sub_func_seq != nullptr) { @@ -50,22 +48,12 @@ void FunctionSequence::run() // run kernel function->run(); - - // deallocate input tensors which is no longer used - _dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(*op_seq_iter); - - op_seq_iter++; } } else { for (const auto &function : _functions) { - auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get()); - if (sub_func_seq != nullptr) - { - sub_func_seq->enableDynamicShapeInferer(false); - } function->run(); } } diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc new file mode 100644 index 000000000..9d548e6dc --- /dev/null +++ b/runtime/onert/core/src/exec/IPermuteFunction.cc @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IPermuteFunction.h" + +#include <cker/operation/Quantize.h> +#include <cker/operation/Dequantize.h> +#include "backend/IPortableTensor.h" +#include "exec/IFunction.h" +#include "ir/Index.h" +#include "ir/Shape.h" +#include <memory> +#include <misc/polymorphic_downcast.h> +#include <typeinfo> +#include "util/Utils.h" +#include <vector> +#include <unordered_map> + +namespace +{ +using namespace onert; + +inline nnfw::cker::Shape getShape(const backend::ITensor *tensor) +{ + const ir::Shape shape = tensor->getShape(); + + assert(tensor->layout() == ir::Layout::NHWC); + + auto rank = shape.rank(); + nnfw::cker::Shape ret(rank); + auto data = ret.DimsData(); + for (int i = 0; i < rank; ++i) + { + data[i] = shape.dim(i); + } + return ret; +} + +// Quantize per element +template <typename InputT, typename OutputT> +void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +{ + const auto scale = dst_tensor->data_scale(); + const auto zero_point = dst_tensor->data_zero_point(); + + int min_val = std::numeric_limits<OutputT>::min(); + int max_val = std::numeric_limits<OutputT>::max(); + + auto loop_shape = src_tensor->getShape(); + const auto src_layout = src_tensor->layout(); + const auto dst_layout = dst_tensor->layout(); + const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4; + ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { + const InputT *input_data = + reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords)); + int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point; + int32_t clamped = std::min(std::max(unclamped, min_val), max_val); + + ir::Coordinates dst_coords = + is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords; + OutputT *output_data = + reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords)); + *output_data = clamped; + }); +} + +// TODO Optimize the case where tensors has the same layout +template <typename InputT, typename OutputT> +void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +{ + if (!src_tensor->has_padding() && !dst_tensor->has_padding() && + src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic()) + { + assert(!dst_tensor->is_dynamic()); + + // Call optimized neon kernel + nnfw::cker::Quantize(getShape(src_tensor), + reinterpret_cast<const InputT *>(src_tensor->buffer()), + getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()), + dst_tensor->data_scale(), dst_tensor->data_zero_point()); + } + else + { + elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor); + } +} + +// Dequantize per element +template <typename InputT, typename OutputT> +void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +{ + const auto scale = src_tensor->data_scale(); + const auto zero_point = src_tensor->data_zero_point(); + + auto loop_shape = src_tensor->getShape(); + const auto src_layout = src_tensor->layout(); + const auto dst_layout = dst_tensor->layout(); + const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4; + ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { + const InputT *input_data = + reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords)); + const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point)); + + ir::Coordinates dst_coords = + is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords; + OutputT *output_data = + reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords)); + *output_data = result; + }); +} + +// TODO Optimize the case where tensors has the same layout +template <typename InputT, typename OutputT> +void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +{ + if (!src_tensor->has_padding() && !dst_tensor->has_padding() && + src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic()) + { + assert(!dst_tensor->is_dynamic()); + + // Call optimized neon kernel + nnfw::cker::Dequantize(getShape(src_tensor), + reinterpret_cast<const InputT *>(src_tensor->buffer()), + getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()), + src_tensor->data_scale(), src_tensor->data_zero_point()); + } + else + { + elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor); + } +} + +template <typename SRC_T, typename DST_T, + std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value && + std::is_base_of<backend::ITensor, DST_T>::value, + bool> = true> +void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor) +{ + // TODO Support other types + if (src_tensor->data_type() == ir::DataType::FLOAT32) + { + switch (dst_tensor->data_type()) + { + case ir::DataType::QUANT_UINT8_ASYMM: + { + quantize<float, uint8_t>(src_tensor, dst_tensor); + break; + } + case ir::DataType::QUANT_INT8_SYMM: + { + quantize<float, int8_t>(src_tensor, dst_tensor); + break; + } + case ir::DataType::QUANT_INT16_SYMM: + { + quantize<float, int16_t>(src_tensor, dst_tensor); + break; + } + default: + { + throw std::runtime_error("IPermuteFunction: Unsupported quantization type"); + break; + } + } + } + else if (dst_tensor->data_type() == ir::DataType::FLOAT32) + { + switch (src_tensor->data_type()) + { + case ir::DataType::QUANT_UINT8_ASYMM: + { + dequantize<uint8_t, float>(src_tensor, dst_tensor); + break; + } + case ir::DataType::QUANT_INT8_SYMM: + { + dequantize<int8_t, float>(src_tensor, dst_tensor); + break; + } + case ir::DataType::QUANT_INT16_SYMM: + { + dequantize<int16_t, float>(src_tensor, dst_tensor); + break; + } + default: + { + throw std::runtime_error("IPermuteFunction: Unsupported dequantization type"); + break; + } + } + } + else + { + throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet"); + } +} + +} // namespace + +namespace onert +{ +namespace exec +{ + +void IPermuteFunction::IPermuteFunction::run() +{ + // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0) + assert(_src_tensors.size() == _dst_tensors.size()); + if (_src_tensors_offsets.size() == 0) + { + _src_tensors_offsets.resize(_src_tensors.size()); + _dst_tensors_offsets.resize(_dst_tensors.size()); + } + assert(_src_tensors.size() == _src_tensors_offsets.size()); + assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size()); + + for (size_t i = 0; i < _src_tensors.size(); ++i) + { + auto src_tensor = _src_tensors.at(i); + auto dst_tensor = _dst_tensors.at(i); + auto &src_offsets = _src_tensors_offsets.at(i); + auto &dst_offsets = _dst_tensors_offsets.at(i); + if (src_tensor != dst_tensor) + { + const auto rank = src_tensor->getShape().rank(); + permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + } + } +} + +void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, + size_t rank, std::vector<size_t> &src_offsets, + std::vector<size_t> &dst_offsets) +{ + if (src_tensor->total_size() == 0) + { + assert(dst_tensor->total_size() == 0); + return; + } + + assert(src_tensor != dst_tensor); + if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type())) + { + typeAwareQuantize(src_tensor, dst_tensor); + return; + } + + switch (src_tensor->data_type()) + { + case ir::DataType::FLOAT32: + permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::INT32: + permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::UINT32: + permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::BOOL8: + case ir::DataType::QUANT_UINT8_ASYMM: + case ir::DataType::UINT8: + permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::QUANT_INT8_ASYMM: + case ir::DataType::QUANT_INT8_SYMM: + permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::INT64: + permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + case ir::DataType::QUANT_INT16_SYMM: + permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; + default: + throw std::runtime_error("IPermuteFunction: Not supported data type"); + break; + } +} + +const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const +{ + switch (type) + { + case ir::DataType::FLOAT32: + return typeid(float); + case ir::DataType::INT32: + return typeid(int32_t); + case ir::DataType::UINT32: + return typeid(uint32_t); + case ir::DataType::INT64: + return typeid(int64_t); + case ir::DataType::BOOL8: + case ir::DataType::QUANT_UINT8_ASYMM: + case ir::DataType::UINT8: + return typeid(uint8_t); + case ir::DataType::QUANT_INT8_ASYMM: + case ir::DataType::QUANT_INT8_SYMM: + return typeid(int8_t); + case ir::DataType::QUANT_INT16_SYMM: + return typeid(int16_t); + default: + throw std::runtime_error("IPermuteFunction: Not supported data type"); + } +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h index 6b4d15380..e790f3290 100644 --- a/runtime/onert/core/src/exec/IPermuteFunction.h +++ b/runtime/onert/core/src/exec/IPermuteFunction.h @@ -25,21 +25,48 @@ #include "backend/ITensor.h" #include "exec/IFunction.h" -#include "ir/Index.h" -#include "ir/Shape.h" #include <memory> -#include <typeinfo> -#include "util/Utils.h" #include <vector> +#include <unordered_map> namespace onert { namespace exec { +inline void UpdateOffsets(::onert::backend::ITensor *src, ::onert::backend::ITensor *dst, + const ::onert::ir::Shape &loop_shape, std::vector<size_t> &src_offsets, + std::vector<size_t> &dst_offsets) +{ + ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { + src_offsets.emplace_back(src->calcOffset(coords)); + dst_offsets.emplace_back(dst->calcOffset(coords)); + }); +} + +inline void CopyStatic(const uint8_t *src_buffer, uint8_t *dst_buffer, + const std::vector<size_t> &src_offsets, + const std::vector<size_t> &dst_offsets, size_t copy_len) +{ + assert(src_offsets.size() == dst_offsets.size()); + for (size_t i = 0; i < src_offsets.size(); ++i) + { + memcpy(dst_buffer + dst_offsets.at(i), src_buffer + src_offsets.at(i), copy_len); + } +} + +inline void CopyDynamic(const ::onert::backend::ITensor *src, const ::onert::backend::ITensor *dst, + uint8_t *dst_buffer, const ::onert::ir::Shape &loop_shape, size_t copy_len) +{ + ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { + // Copy src tensor's data to dst_buffer with calculated offset of dst tensor + memcpy(dst_buffer + dst->calcOffset(coords), src->buffer() + src->calcOffset(coords), copy_len); + }); +} + class IPermuteFunction : public IFunction { -private: +protected: enum class PermuteType { NHWC_TO_NCHW, @@ -48,63 +75,69 @@ private: }; public: - virtual void run() override + virtual void run() override; + + virtual void prepare() override { optimize(); } + + virtual void optimize() = 0; + +protected: + void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank, + std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets); + +private: + // TODO make src const by proving const access() + template <class T> + void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, + std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets) { - assert(_src_tensors.size() > 0); - assert(_src_tensors.size() == _dst_tensors.size()); - auto src_it = _src_tensors.begin(); - auto dst_it = _dst_tensors.begin(); - while (src_it != _src_tensors.end()) + assert(src->total_size() != 0 && dst->total_size() != 0); + // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer + if (dst->needMemoryMap() && !dst->is_subtensor()) { - const auto src_tensor = *src_it; - auto dst_tensor = *dst_it; - if (src_tensor != dst_tensor) + // A assertion to check mapping without calling map() + // Now there is no case where both src and dst have cl buffer. + assert(!src->needMemoryMap()); + + if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout()) { - // TODO Change to permute in parallel - assert(underlying_type(src_tensor->data_type()) == - underlying_type(dst_tensor->data_type())); - const auto rank = src_tensor->num_dimensions(); - switch (src_tensor->data_type()) - { - case ir::DataType::FLOAT32: - permute<float>(src_tensor, dst_tensor, rank); - break; - case ir::DataType::INT32: - permute<int32_t>(src_tensor, dst_tensor, rank); - break; - case ir::DataType::UINT32: - permute<uint32_t>(src_tensor, dst_tensor, rank); - break; - case ir::DataType::BOOL8: - case ir::DataType::QUANT_UINT8_ASYMM: - case ir::DataType::UINT8: - permute<uint8_t>(src_tensor, dst_tensor, rank); - break; - case ir::DataType::QUANT_INT8_SYMM: - permute<int8_t>(src_tensor, dst_tensor, rank); - break; - case ir::DataType::INT64: - permute<int64_t>(src_tensor, dst_tensor, rank); - break; - default: - throw std::runtime_error("IPermuteFunction: Not supported data type"); - break; - } + src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); }); } - src_it++; - dst_it++; + else + { + // TODO Optimize this block in case of that padding size of dst is big. + _buffers_map[dst].reserve(dst->total_size()); + auto dst_buffer = _buffers_map[dst].data(); + src->access([&](backend::ITensor &) { + permute<T>(src, dst, rank, dst_buffer, dst->total_size(), src_offsets, dst_offsets); + }); + dst->enqueueWriteBuffer(dst_buffer, false); + } + } + else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() && + !dst->has_padding() && src->layout() == dst->layout()) + { + assert(!dst->needMemoryMap()); + dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); }); + } + else + { + auto fn = [&](backend::ITensor &) { + dst->access([&](backend::ITensor &) { + permute<T>(src, dst, rank, dst->buffer(), dst->total_size(), src_offsets, dst_offsets); + }); + }; + src->access(fn); } } - virtual void prepare() override { optimize(); } - - virtual void optimize() = 0; - -private: template <class T> - void permute(const std::shared_ptr<backend::ITensor> &src, std::shared_ptr<backend::ITensor> &dst, - size_t rank) + void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, uint8_t *dst_buffer, + size_t dst_size, std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets) { + assert(dst_buffer != nullptr); + assert(dst_size == dst->total_size()); + const auto permute_type = [&]() -> PermuteType { if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW) { @@ -119,166 +152,115 @@ private: return PermuteType::COPY; } }(); - auto fn = [&](backend::ITensor &src_tensor) { - dst->access([&](backend::ITensor &dst_tensor) { - auto src_buffer = src_tensor.buffer(); - auto src_size = src_tensor.total_size(); - auto dst_buffer = dst_tensor.buffer(); - if (permute_type == PermuteType::COPY) + if (rank == 4 && permute_type != PermuteType::COPY) + { + switch (permute_type) + { + case PermuteType::NHWC_TO_NCHW: { - assert(src_tensor.layout() == dst_tensor.layout()); - if (!src_tensor.has_padding() && !dst_tensor.has_padding()) - { - assert(src_size <= dst_tensor.total_size()); - memcpy(dst_buffer, src_buffer, src_size); - return; - } + ir::FeatureShape shape; + auto dst_shape = dst->getShape(); + shape.N = dst_shape.dim(0); + shape.C = dst_shape.dim(1); + shape.H = dst_shape.dim(2); + shape.W = dst_shape.dim(3); + + typename feature::nchw::View<T>::Strides strides; + const auto start_offset = dst->calcOffset({0, 0, 0, 0}); + strides.W = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset; + strides.H = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset; + strides.C = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset; + strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset; + + const feature::nhwc::Reader<T> from(src); + feature::nchw::View<T> into(shape, strides, + reinterpret_cast<T *>(dst_buffer + start_offset), dst_size); + feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, row, col, ch); + into.at(batch, ch, row, col) = value; + }; + break; } - switch (rank) + case PermuteType::NCHW_TO_NHWC: { - case 0: - case 1: - { - const int32_t copy_len = dst_tensor.dimension(0); + ir::FeatureShape shape; + auto dst_shape = dst->getShape(); + shape.N = dst_shape.dim(0); + shape.H = dst_shape.dim(1); + shape.W = dst_shape.dim(2); + shape.C = dst_shape.dim(3); - memcpy(dst_buffer, src_buffer, copy_len * sizeof(T)); - break; - } - case 2: - { - const int32_t dim_0 = dst_tensor.dimension(0); - const int32_t copy_len = dst_tensor.dimension(1); + typename feature::nhwc::View<T>::Strides strides; + const auto start_offset = dst->calcOffset({0, 0, 0, 0}); + strides.C = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset; + strides.W = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset; + strides.H = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset; + strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset; - for (int32_t i = 0; i < dim_0; ++i) - { - ir::Coordinates coords{i, 0}; - memcpy(dst_buffer + dst_tensor.calcOffset(coords), - src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T)); - } - break; - } - case 3: - { - const int32_t dim_0 = dst_tensor.dimension(0); - const int32_t dim_1 = dst_tensor.dimension(1); - const int32_t copy_len = dst_tensor.dimension(2); + const feature::nchw::Reader<T> from(src); + feature::nhwc::View<T> into(shape, strides, + reinterpret_cast<T *>(dst_buffer + start_offset), dst_size); + feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, row, col, ch) = value; + }; + break; + } + default: + { + throw std::runtime_error("Unsupported Permutation"); + break; + } + } + } + else if (!src->has_padding() && !dst->has_padding()) + { + auto src_size = src->total_size(); + assert(src_size <= dst->total_size()); + memcpy(dst_buffer, src->buffer(), src_size); + } + else + { + auto loop_shape = src->getShape(); + const auto copy_axis = loop_shape.rank() - 1; + const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T); + loop_shape.dim(copy_axis) = 1; - for (auto i = 0; i < dim_0; ++i) - { - for (auto j = 0; j < dim_1; ++j) - { - ir::Coordinates coords{i, j, 0}; - memcpy(dst_buffer + dst_tensor.calcOffset(coords), - src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T)); - } - } - break; - } - case 4: - { - switch (permute_type) - { - case PermuteType::NHWC_TO_NCHW: - { - ir::FeatureShape shape; - shape.N = dst_tensor.dimension(0); - shape.C = dst_tensor.dimension(1); - shape.H = dst_tensor.dimension(2); - shape.W = dst_tensor.dimension(3); - const feature::nhwc::Reader<T> from(&src_tensor); - feature::nchw::View<T> into(&dst_tensor); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, row, col, ch); - into.at(batch, ch, row, col) = value; - }; - break; - } - case PermuteType::NCHW_TO_NHWC: - { - ir::FeatureShape shape; - shape.N = src_tensor.dimension(0); - shape.C = src_tensor.dimension(1); - shape.H = src_tensor.dimension(2); - shape.W = src_tensor.dimension(3); - const feature::nchw::Reader<T> from(&src_tensor); - feature::nhwc::View<T> into(&dst_tensor); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, row, col, ch) = value; - }; - break; - } - case PermuteType::COPY: - { - const int32_t dim_0 = dst_tensor.dimension(0); - const int32_t dim_1 = dst_tensor.dimension(1); - const int32_t dim_2 = dst_tensor.dimension(2); - const int32_t copy_len = dst_tensor.dimension(3); + if (src->is_dynamic()) + { + assert(dst->is_dynamic()); + CopyDynamic(src, dst, dst_buffer, loop_shape, copy_len); + } + else + { + // TODO Uncomment the assertion below + // assert(!dst->is_dynamic() || dst is output of graph); + if (src_offsets.size() == 0) + { + assert(dst_offsets.size() == 0); - for (auto i = 0; i < dim_0; ++i) - { - for (auto j = 0; j < dim_1; ++j) - { - for (auto k = 0; k < dim_2; ++k) - { - ir::Coordinates coords{i, j, k, 0}; - memcpy(dst_buffer + dst_tensor.calcOffset(coords), - src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T)); - } - } - } - break; - } - default: - { - throw std::runtime_error("Unsupported Permutation"); - break; - } - } - break; - } - default: - throw std::runtime_error("Unsupported rank in permutation"); - break; + auto loop_shape = src->getShape(); + const auto copy_axis = loop_shape.rank() - 1; + loop_shape.dim(copy_axis) = 1; + UpdateOffsets(src, dst, loop_shape, src_offsets, dst_offsets); } - }); - }; - src->access(fn); + CopyStatic(src->buffer(), dst_buffer, src_offsets, dst_offsets, copy_len); + } + } } +protected: // NOTE The typeid expression is lvalue expression which refers to an object with static storage // duration, of the polymorphic type const std::type_info or of some type derived from it. // So std::type_info is non-copyable - const std::type_info &underlying_type(ir::DataType type) const - { - switch (type) - { - case ir::DataType::FLOAT32: - return typeid(float); - case ir::DataType::INT32: - return typeid(int32_t); - case ir::DataType::UINT32: - return typeid(uint32_t); - case ir::DataType::INT64: - return typeid(int64_t); - case ir::DataType::BOOL8: - case ir::DataType::QUANT_UINT8_ASYMM: - case ir::DataType::UINT8: - return typeid(uint8_t); - case ir::DataType::QUANT_INT8_SYMM: - return typeid(int8_t); - default: - throw std::runtime_error("IPermuteFunction: Not supported data type"); - } - } + const std::type_info &underlying_type(ir::DataType type) const; protected: - std::vector<std::shared_ptr<backend::ITensor>> _src_tensors; - std::vector<std::shared_ptr<backend::ITensor>> _dst_tensors; - // TODO Remove this member if it is possible - std::vector<size_t> _ranks; + std::vector<backend::ITensor *> _src_tensors; + std::vector<backend::ITensor *> _dst_tensors; + std::vector<std::vector<size_t>> _src_tensors_offsets; + std::vector<std::vector<size_t>> _dst_tensors_offsets; + std::unordered_map<const backend::ITensor *, std::vector<uint8_t>> _buffers_map; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc new file mode 100644 index 000000000..1009f194d --- /dev/null +++ b/runtime/onert/core/src/exec/IPermuteFunction.test.cc @@ -0,0 +1,902 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IPermuteFunction.h" + +#include <ir/Layout.h> +#include <ir/Shape.h> +#include <ir/TypeInfo.h> + +#include <cmath> +#include <gtest/gtest.h> + +namespace +{ +using namespace onert; +using namespace ir; +using namespace backend; +using namespace exec; + +class MockUpTensor : public ITensor +{ +public: + MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad) + : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout) + { + _strides.resize(shape.rank()); + + std::vector<size_t> pads(shape.rank(), 0); + pads[shape.rank() - 1] = pad; + size_t stride = 1; + for (int32_t i = _shape.rank() - 1; i >= 0; --i) + { + _strides.at(i) = stride; + stride = stride * (_shape.dim(i) + pads.at(i)); + } + } + virtual ~MockUpTensor() {} + + void setBuffer(uint8_t *data) { _data = data; } + + size_t total_size() const override + { + size_t total_size = _strides[0] * _shape.dim(0); + total_size *= sizeOfDataType(data_type()); + return total_size; + } + + size_t calcOffset(const ir::Coordinates &coords) const override + { + size_t offset = 0; + for (size_t i = 0; i < _shape.rank(); ++i) + { + offset += (_strides[i] * coords[i]); + } + offset *= sizeOfDataType(data_type()); + return offset; + } + + uint8_t *buffer() const override { return _data; } + + ir::Layout layout() const override { return _layout; } + ir::DataType data_type() const override { return _type_info.type(); } + float data_scale() const override { return _type_info.scale(); } + int32_t data_zero_point() const override { return _type_info.zero_point(); } + const std::vector<float> &data_scales() const override { return _type_info.scales(); } + const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); } + bool has_padding() const override + { + return total_size() / sizeOfDataType(data_type()) != _shape.num_elements(); + } + void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); } + + bool is_dynamic() const override { return false; } + Shape getShape() const override { return _shape; } + +private: + Shape _shape; + TypeInfo _type_info; + Layout _layout; + uint8_t *_data; + std::vector<size_t> _strides; +}; + +class MockUpLayer : public IPermuteFunction +{ +public: + MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs) + { + assert(inputs.size() == outputs.size()); + _src_tensors = inputs; + _dst_tensors = outputs; + } + virtual ~MockUpLayer() {} + void optimize() override {} +}; + +TEST(IPermuteFunction, float_rank1) +{ + const size_t input_pads[4] = {0, 1, 0, 2}; + const size_t output_pads[4] = {0, 0, 2, 1}; + const std::vector<Shape> shapes{{1}, {4}, {5}, {2}}; + float expected_buffer[] = {1, 0, -1, -2, 3}; + const auto type_info = TypeInfo(DataType::FLOAT32); + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + Coordinates coords{j}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } +} + +TEST(IPermuteFunction, float_rank2) +{ + const size_t input_pads[4] = {0, 1, 0, 2}; + const size_t output_pads[4] = {0, 0, 2, 1}; + const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}}; + float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8}; + const auto type_info = TypeInfo(DataType::FLOAT32); + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + Coordinates coords{j, k}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(IPermuteFunction, float_rank3) +{ + const size_t input_pads[4] = {0, 5, 0, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}}; + float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10}; + const auto type_info = TypeInfo(DataType::FLOAT32); + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + Coordinates coords{j, k, l}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(IPermuteFunction, float_rank4) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10}; + const auto type_info = TypeInfo(DataType::FLOAT32); + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, float_rank4_layout) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16}; + const auto type_info = TypeInfo(DataType::FLOAT32); + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + Layout layout = Layout::NHWC; + Shape shape = shapes[i]; + if (i % 2 == 1) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + if (layout == Layout::NHWC) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + else + { + layout = Layout::NHWC; + shape = shapes[i]; + } + outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates input_coords; + Coordinates output_coords; + if (inputs[i]->layout() == Layout::NHWC) + { + input_coords = Coordinates{j, k, l, m}; + } + else + { + input_coords = Coordinates{j, m, k, l}; + } + if (outputs[i]->layout() == Layout::NHWC) + { + output_coords = Coordinates{j, k, l, m}; + } + else + { + output_coords = Coordinates{j, m, k, l}; + } + float result = *reinterpret_cast<float *>(outputs[i]->buffer() + + outputs[i]->calcOffset(output_coords)); + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, float_to_qasymm8) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 128; + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC, + input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point}; + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + uint8_t qasymm8 = + *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float result = (qasymm8 - zero_point) * scale; + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, float_to_qsymm8) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 0; + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC, + input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point}; + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + int8_t qsymm8 = + *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float result = (qsymm8 - zero_point) * scale; + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, float_to_qsymm16) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 0; + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC, + input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point}; + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + int16_t qsymm16 = + *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + float result = (qsymm16 - zero_point) * scale; + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, qasymm8_to_float) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 128; + uint8_t input_buffer[12]; + + int32_t min_val = std::numeric_limits<uint8_t>::min(); + int32_t max_val = std::numeric_limits<uint8_t>::max(); + for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i) + { + int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point; + input_buffer[i] = std::min(std::max(unclamped, min_val), max_val); + } + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point}; + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), + Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + uint8_t qasymm8 = + *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + float expected = (qasymm8 - zero_point) * scale; + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, qsymm8_to_float) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 0; + uint8_t input_buffer[12]; + + int32_t min_val = std::numeric_limits<int8_t>::min(); + int32_t max_val = std::numeric_limits<int8_t>::max(); + for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i) + { + int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point; + input_buffer[i] = std::min(std::max(unclamped, min_val), max_val); + } + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point}; + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), + Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + int8_t qasymm8 = + *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + float expected = (qasymm8 - zero_point) * scale; + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, qsymm16_to_float) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100}; + float scale = 10; + int32_t zero_point = 0; + uint8_t input_buffer[12]; + + int32_t min_val = std::numeric_limits<int16_t>::min(); + int32_t max_val = std::numeric_limits<int16_t>::max(); + for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i) + { + int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point; + input_buffer[i] = std::min(std::max(unclamped, min_val), max_val); + } + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point}; + inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer)); + + outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), + Layout::NHWC, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates coords{j, k, l, m}; + float result = + *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords)); + int16_t qasymm8 = + *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords)); + float expected = (qasymm8 - zero_point) * scale; + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, float_to_qasymm8_layout) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, + -80, 90, -100, 110, -120, 130, -140, 150, -160}; + float scale = 10; + int32_t zero_point = 128; + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + Layout layout = Layout::NHWC; + Shape shape = shapes[i]; + if (i % 2 == 1) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + inputs[i] = + std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + if (layout == Layout::NHWC) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + else + { + layout = Layout::NHWC; + shape = shapes[i]; + } + TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point}; + outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates input_coords; + Coordinates output_coords; + if (inputs[i]->layout() == Layout::NHWC) + { + input_coords = Coordinates{j, k, l, m}; + } + else + { + input_coords = Coordinates{j, m, k, l}; + } + if (outputs[i]->layout() == Layout::NHWC) + { + output_coords = Coordinates{j, k, l, m}; + } + else + { + output_coords = Coordinates{j, m, k, l}; + } + uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + + outputs[i]->calcOffset(output_coords)); + float result = (qasymm8 - zero_point) * scale; + float expected = + *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords)); + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +TEST(IPermuteFunction, asymm8_to_float_layout) +{ + const size_t input_pads[4] = {0, 0, 1, 2}; + const size_t output_pads[4] = {0, 3, 2, 1}; + const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}}; + float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, + -80, 90, -100, 110, -120, 130, -140, 150, -160}; + float scale = 10; + int32_t zero_point = 128; + uint8_t input_buffer[18]; + + int32_t min_val = std::numeric_limits<int16_t>::min(); + int32_t max_val = std::numeric_limits<int16_t>::max(); + for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i) + { + int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point; + input_buffer[i] = std::min(std::max(unclamped, min_val), max_val); + } + + std::vector<std::unique_ptr<MockUpTensor>> inputs(4); + std::vector<std::unique_ptr<MockUpTensor>> outputs(4); + std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4); + for (size_t i = 0; i < 4; ++i) + { + Layout layout = Layout::NHWC; + Shape shape = shapes[i]; + if (i % 2 == 1) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point}; + inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]); + inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer)); + + if (layout == Layout::NHWC) + { + layout = Layout::NCHW; + shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)}; + } + else + { + layout = Layout::NHWC; + shape = shapes[i]; + } + outputs[i] = + std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]); + output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size()); + outputs[i]->setBuffer(output_buffers[i].get()); + } + + auto mockup_layer = std::make_unique<MockUpLayer>( + std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()}, + std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()}); + mockup_layer->run(); + + for (size_t i = 0; i < 4; ++i) + { + for (int32_t j = 0; j < shapes[i].dim(0); ++j) + { + for (int32_t k = 0; k < shapes[i].dim(1); ++k) + { + for (int32_t l = 0; l < shapes[i].dim(2); ++l) + { + for (int32_t m = 0; m < shapes[i].dim(3); ++m) + { + Coordinates input_coords; + Coordinates output_coords; + if (inputs[i]->layout() == Layout::NHWC) + { + input_coords = Coordinates{j, k, l, m}; + } + else + { + input_coords = Coordinates{j, m, k, l}; + } + if (outputs[i]->layout() == Layout::NHWC) + { + output_coords = Coordinates{j, k, l, m}; + } + else + { + output_coords = Coordinates{j, m, k, l}; + } + float result = *reinterpret_cast<float *>(outputs[i]->buffer() + + outputs[i]->calcOffset(output_coords)); + uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + + inputs[i]->calcOffset(input_coords)); + float expected = (qasymm8 - zero_point) * scale; + EXPECT_EQ(result, expected); + } + } + } + } + } +} + +} // namespace diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc index 72a18def1..d149345fd 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.cc +++ b/runtime/onert/core/src/exec/JSONExecTime.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "exec/JSONExecTime.h" -#include "backend/IConfig.h" +#include "JSONExecTime.h" + #include <fstream> namespace onert @@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info, stream.seekp(-2, std::ofstream::end); } -void JSON::uploadOperationsExecTime() const +void JSON::storeOperationsExecTime() const { std::ofstream stream(_measurement_file); if (!stream.is_open()) diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h index a64cb3133..e01723611 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.h +++ b/runtime/onert/core/src/exec/JSONExecTime.h @@ -37,15 +37,15 @@ namespace exec * _measurements[Backend*]["string"][bool][uint32_t] = int64_t */ using MeasurementData = std::unordered_map< - const backend::Backend *, - std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>; + const backend::Backend *, + std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>; class JSON { public: explicit JSON(const std::vector<const backend::Backend *> &backends, MeasurementData &measurements) - : _measurement_file("exec_time.json"), _backends(), _measurements(measurements) + : _measurement_file("exec_time.json"), _backends(), _measurements(measurements) { for (const auto b : backends) { @@ -54,18 +54,16 @@ public: loadOperationsExecTime(); }; /** - * @brief Update _operations_exec_time_file with new data. + * @brief Update _measurement_file with new data. */ - void uploadOperationsExecTime() const; + void storeOperationsExecTime() const; private: ///@brief file containing measurements std::string _measurement_file; std::unordered_map<std::string, const backend::Backend *> _backends; - std::unordered_map< - const backend::Backend *, - std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>> - &_measurements; + MeasurementData &_measurements; + /** * @brief Helper function for inserting data to OperationExecTimes * @@ -86,7 +84,7 @@ private: void printOperation(const std::map<uint32_t, int64_t> &operation_info, std::ofstream &stream) const; /** - * @brief Parse and load operations_exec_time from _operations_exec_time_file. + * @brief Parse and load _measurements from _measurement_file. */ void loadOperationsExecTime(); }; diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc index 69dfe9b9b..a64dadcb1 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.cc +++ b/runtime/onert/core/src/exec/LinearExecutor.cc @@ -24,41 +24,54 @@ namespace onert namespace exec { -#ifdef RUY_PROFILER -namespace -{ -char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operations &operations) +void LinearExecutor::executeImpl() { - auto node_name = operations.at(*op_seq->begin()).name(); - char *cstr = new char[node_name.length() + 1]; - std::strcpy(cstr, node_name.c_str()); - return cstr; -} -} // namespace + if (_tracing_ctx) + { + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); + for (auto &&code : _code) + { + const auto backend = code.lower_info->backend(); +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); #endif + _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend); -void LinearExecutor::executeImpl() -{ - _subject.notifyModelBegin(this); - for (auto &&code : _code) + auto &fn_seq = code.fn_seq; + + fn_seq->initRunning(); + + bool handle_dynamic_tensor = + _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput(); + fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor); + fn_seq->run(); + + _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend); + } + _subject.notifySubgraphEnd(profiling_subg_index); + } + else { - const auto op_seq = code.op_seq; - const auto backend = code.lower_info->backend(); + for (auto &&code : _code) + { // TODO : Move ruy profiler into ExecutionObserver #ifdef RUY_PROFILER - ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations())); + ruy::profiler::ScopeLabel label(code.op->name()); #endif - _subject.notifyJobBegin(this, op_seq, backend); - auto &fn_seq = code.fn_seq; - bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput(); + auto &fn_seq = code.fn_seq; - fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor); - fn_seq->run(); + fn_seq->initRunning(); - _subject.notifyJobEnd(this, op_seq, backend); + bool handle_dynamic_tensor = + _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput(); + fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor); + fn_seq->run(); + } } - _subject.notifyModelEnd(this); } } // namespace exec diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index c224d3f4f..cc073411a 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -22,11 +22,11 @@ #ifndef __ONERT_EXEC_EXECUTOR_H_ #define __ONERT_EXEC_EXECUTOR_H_ -#include "ir/Index.h" #include "ExecutorBase.h" -#include "compiler/Linear.h" -#include "exec/FunctionSequence.h" + #include "compiler/CodeMap.h" +#include "ir/Index.h" +#include "util/TracingCtx.h" namespace onert { @@ -44,18 +44,15 @@ public: * @brief Construct a new LinearExecutor object * @param lowered_graph LoweredGraph object * @param tensor_builders Tensor builders that are currently used - * @param code_map OpSequence and its code map + * @param code_map @c ir::Operation and its code map */ LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, - backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map, - const std::vector<ir::OpSequenceIndex> &order) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(tensor_mgrs)} + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx} { - for (auto index : order) + for (auto &&index : order) { _code.emplace_back(std::move(code_map.at(index))); } diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.cc b/runtime/onert/core/src/exec/MinMaxRecorder.cc new file mode 100644 index 000000000..88fc104d1 --- /dev/null +++ b/runtime/onert/core/src/exec/MinMaxRecorder.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MinMaxRecorder.h" + +#include "backend/ITensor.h" + +#include <cassert> +#include <cmath> + +namespace onert +{ +namespace exec +{ + +MinMaxRecorder::MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph, + const backend::BackendContexts &backend_contexts) + : _graph{graph}, _backend_contexts{backend_contexts}, _h5dumper(minmax_filepath) +{ +} + +void MinMaxRecorder::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_idx, + ir::OperationIndex op_idx, const backend::Backend *backend) +{ + const auto &tensor_reg = _backend_contexts.at(backend)->tensor_registry; + const auto &op = _graph.operations().at(op_idx); + const auto &outputs = op.getOutputs(); + // TODO: Support multiple output + if (outputs.size() != 1) + throw std::runtime_error("Only 1 output operator is supported for recording minmax."); + + auto tensor = tensor_reg->getITensor(outputs.at(0)); + + // Logic copied from MinMaxObserver.cpp. + + // Filter Ops + if (tensor->is_constant()) + return; + + if (tensor->data_type() != ir::DataType::FLOAT32) + return; + + switch (op.opcode()) + { + // Operators with multiple outputs + case ir::OpCode::If: + case ir::OpCode::Split: + case ir::OpCode::SplitV: + case ir::OpCode::TopKV2: + case ir::OpCode::Unpack: + case ir::OpCode::While: + return; + // NOTE: Sin, Cos, Tanh's output is in [-1, 1] + // We may not need to dump those operators. + default:; // Do Nothing + } + + // Otherwise, dump! + assert(tensor->data_type() == ir::DataType::FLOAT32); + const auto data = reinterpret_cast<float *>(tensor->buffer()); + const auto num_elements = tensor->total_size() / sizeof(float); + + float max = std::numeric_limits<float>::lowest(); + float min = std::numeric_limits<float>::max(); + + bool all_nan = true; + for (size_t i = 0; i < num_elements; ++i) + { + const float number = data[i]; + if (std::isnan(number)) + continue; + + if (number == std::numeric_limits<float>::lowest()) + continue; + + all_nan = false; + + if (number > max) + max = number; + + if (number < min) + min = number; + } + + if (all_nan) + throw std::runtime_error("All values are NaN(Not a Number)"); + + _minmax_map.append({subg_idx, op_idx}, min, max); +} + +void MinMaxRecorder::handleSubgraphEnd(ir::SubgraphIndex) +{ + // It would be better to dump at the end of model execution, not subgraph + // But it requires more changes than subgraph. + _h5dumper.dump(_minmax_map); +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.h b/runtime/onert/core/src/exec/MinMaxRecorder.h new file mode 100644 index 000000000..7a0817f5f --- /dev/null +++ b/runtime/onert/core/src/exec/MinMaxRecorder.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_MINMAX_RECORDER__ +#define __ONERT_EXEC_MINMAX_RECORDER__ + +#include "ExecutionObservers.h" +#include "ir/Index.h" +#include "exec/MinMaxMap.h" +#include "../dumper/h5/MinMaxDumper.h" + +#include <memory> + +namespace onert +{ +namespace exec +{ + +class MinMaxRecorder : public IExecutionObserver +{ +public: + MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph, + const backend::BackendContexts &backend_contexts); + void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override + { + return; + } + void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex, + const backend::Backend *) override; + void handleSubgraphEnd(ir::SubgraphIndex) override; + +private: + const ir::Graph &_graph; + const backend::BackendContexts &_backend_contexts; + dumper::h5::MinMaxDumper _h5dumper; + SMMinMaxMap _minmax_map; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_MINMAX_RECORDER__ diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc index ab234aacd..9da7c82b4 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.cc +++ b/runtime/onert/core/src/exec/ParallelExecutor.cc @@ -31,7 +31,7 @@ class HookFunction : public IFunction public: HookFunction(IFunction *fn, const std::function<void()> &setup, const std::function<void()> &teardown) - : _fn{fn}, _setup{setup}, _teardown{teardown} + : _fn{fn}, _setup{setup}, _teardown{teardown} { } @@ -59,14 +59,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id) _cv_jobs.notify_all(); } -ParallelExecutor::ParallelExecutor( - std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs, - compiler::CodeMap &&code_map) - : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(tensor_mgrs), std::move(code_map)} +ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, + std::move(code_map), tracing_ctx} { VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; } @@ -76,12 +75,12 @@ void ParallelExecutor::executeImpl() bool dynamic_input_exists = hasDynamicInput(); // Init scheduler - // TODO Consider to have distinct backend set in LowerInfoMap + // TODO Consider to have distinct backend set in GraphLowerInfo BackendSet backends; - for (auto &itr : _lowered_graph->getLowerInfo()->op_seq) - { - backends.add(itr.second->backend()); - } + _lowered_graph->lower_info().operation.iterate( + [&](const ir::OperationIndex &, const compiler::OperationLowerInfo &lower_info) { + backends.add(lower_info.backend()); + }); _scheduler = std::make_unique<ParallelScheduler>(backends); assert(noWaitingJobs()); @@ -101,7 +100,10 @@ void ParallelExecutor::executeImpl() VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); + while (true) { std::unique_lock<std::mutex> lock{_mu_jobs}; @@ -121,20 +123,24 @@ void ParallelExecutor::executeImpl() lock.unlock(); - VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl; + VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl; auto job_index = job->index(); - auto op_sequence_index = _job_to_op_seq[job_index]; - auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index); - auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend(); - auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); }; - auto teardown = [&, job_index, op_seq, backend]() { - _subject.notifyJobEnd(this, op_seq, backend); + auto op_ind = _job_to_op[job_index]; + auto backend = _lowered_graph->lower_info().operation.at(op_ind).backend(); + auto setup = [&, op_ind, backend]() { + _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend); + }; + auto teardown = [&, job_index, op_ind, backend]() { + _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend); notify(job_index); }; + job->fn_seq()->initRunning(); + // dynamic tensor setting - bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists; + bool handle_dynamic_tensor = + _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists; job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor); _scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend); @@ -145,7 +151,7 @@ void ParallelExecutor::executeImpl() // Wait for all the jobs done _scheduler->finish(); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 929edfce9..7d459b0b4 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -17,17 +17,12 @@ #ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__ #define __ONERT_EXEC_PARALLEL_EXECUTOR_H__ -#include <list> -#include <queue> -#include <unordered_map> +#include "DataflowExecutor.h" +#include "ParallelScheduler.h" + +#include "util/TracingCtx.h" -#include "exec/FunctionSequence.h" -#include "Job.h" -#include "ir/OperandIndexSequence.h" -#include "ir/Index.h" #include <memory> -#include "exec/DataflowExecutor.h" -#include "ParallelScheduler.h" namespace onert { @@ -48,13 +43,12 @@ public: * * @param lowered_graph LoweredGraph object * @param tensor_builders Tensor builders that are currently used - * @param code_map OpSequence and its code map + * @param code_map @c ir::Operation and its code map */ ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, - const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, - const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorRegistries &tensor_regs, - backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map); + backend::BackendContexts &&backend_contexts, + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc index 70c9c3dd6..538945631 100644 --- a/runtime/onert/core/src/exec/ParallelScheduler.cc +++ b/runtime/onert/core/src/exec/ParallelScheduler.cc @@ -30,7 +30,7 @@ ParallelScheduler::ParallelScheduler(const BackendSet &backends) { assert(!backends.empty()); - for (auto backend : backends) + for (auto &&backend : backends) { _thread_pools[backend] = std::make_unique<ThreadPool>(); } @@ -45,7 +45,7 @@ void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::B void ParallelScheduler::finish() { - for (auto &itr : _thread_pools) + for (auto &&itr : _thread_pools) { itr.second->finish(); } diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.cc b/runtime/onert/core/src/exec/SingleModelExecutors.cc new file mode 100644 index 000000000..4b954bab2 --- /dev/null +++ b/runtime/onert/core/src/exec/SingleModelExecutors.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SingleModelExecutors.h" + +#include "../backend/builtin/IOTensor.h" + +namespace onert +{ +namespace exec +{ + +void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) +{ + _executors.emplace(subg_index, std::move(exec)); +} + +IExecutor *SingleModelExecutors::at(const ir::ModelIndex &, + const ir::SubgraphIndex &subg_index) const +{ + return _executors.at(subg_index).get(); +} + +uint32_t SingleModelExecutors::inputSize() const +{ + return entryExecutor()->getInputTensors().size(); +} + +uint32_t SingleModelExecutors::outputSize() const +{ + return entryExecutor()->getOutputTensors().size(); +} + +const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getInputTensors().at(index.value())->orig_info(); +} + +const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getOutputTensors().at(index.value())->orig_info(); +} + +void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); } + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.h b/runtime/onert/core/src/exec/SingleModelExecutors.h new file mode 100644 index 000000000..98d629eae --- /dev/null +++ b/runtime/onert/core/src/exec/SingleModelExecutors.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__ +#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__ + +#include "exec/IExecutors.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace exec +{ + +/** + * @brief Class to gather executor set for single model NN package + */ +class SingleModelExecutors : public IExecutors +{ +public: + /** + * @brief Construct a new SingleModelExecutors object + */ + SingleModelExecutors(void) = default; + SingleModelExecutors(const SingleModelExecutors &) = delete; + SingleModelExecutors(SingleModelExecutors &&) = default; + + /** + * @brief Destroy the SingleModelExecutors object + */ + ~SingleModelExecutors() = default; + +public: + void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) override; + + IExecutor *at(const ir::ModelIndex &model_index, + const ir::SubgraphIndex &subg_index) const override; + + uint32_t inputSize() const override; + + uint32_t outputSize() const override; + + const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override; + + const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override; + + void execute(const IODescription &desc) override; + +private: + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__ diff --git a/runtime/onert/core/src/exec/Sink.h b/runtime/onert/core/src/exec/Sink.h deleted file mode 100644 index 6a99efe60..000000000 --- a/runtime/onert/core/src/exec/Sink.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_EXEC_SINK_H__ -#define __ONERT_EXEC_SINK_H__ - -#include "feature/nchw/Reader.h" -#include "feature/nchw/View.h" -#include "feature/nhwc/Reader.h" -#include "feature/nhwc/View.h" - -#include <cassert> -#include <memory> -#include "util/Utils.h" -#include <misc/feature/IndexIterator.h> - -namespace onert -{ -namespace exec -{ -struct ISink -{ - virtual ~ISink() = default; - - virtual void pull(::onert::backend::ITensor &tensor) const = 0; -}; - -// Create second lever inheritance: the first lever is used as a reference type in use-case places -template <typename T> class ITemplSink : public ISink -{ -public: - ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, - const bool copy, ir::Layout io_layout) - : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size}, - _shape{shape}, _copy{copy}, _io_layout{io_layout} - { - } - -protected: - void pullUnif(onert::backend::ITensor &tensor) const - { - assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || - (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || - _copy); - auto input_buffer = tensor.buffer(); - auto rank = _shape.rank(); - - if (!tensor.has_padding() && rank < 4 + _copy) - { - memcpy(_output_buffer, input_buffer, _output_size); - return; - } - - switch (rank) - { - case 0: - case 1: - { - memcpy(_output_buffer, input_buffer, _output_size); - break; - } - case 2: - { - const int32_t copy_len = _shape.dim(1); - - for (auto i = 0; i < _shape.dim(0); ++i) - { - ir::Coordinates coords{i, 0}; - memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords), - copy_len * sizeof(T)); - } - break; - } - case 3: - { - const int32_t dim1 = _shape.dim(1); - const int32_t dim2 = _shape.dim(2); - - for (auto i = 0; i < _shape.dim(0); ++i) - { - for (auto j = 0; j < _shape.dim(1); ++j) - { - ir::Coordinates coords{i, j, 0}; - memcpy(_output_buffer + i * dim1 * dim2 + j * dim2, - input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T)); - } - } - break; - } - case 4: - { - if (_copy) - { - const int32_t dim1 = _shape.dim(1); - const int32_t dim2 = _shape.dim(2); - const int32_t dim3 = _shape.dim(3); - - for (auto i = 0; i < _shape.dim(0); ++i) - { - for (auto j = 0; j < _shape.dim(1); ++j) - { - for (auto k = 0; k < _shape.dim(2); ++k) - { - ir::Coordinates coords{i, j, k, 0}; - memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, - input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T)); - } - } - } - } - else - { - const auto shape = _shape.asFeature(_io_layout); - - if (_io_layout == ir::Layout::NHWC) - { - const exec::feature::nchw::Reader<T> from(&tensor); - exec::feature::nhwc::View<T> into(shape, _output_buffer, _output_size); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, row, col, ch) = value; - }; - } - else if (_io_layout == ir::Layout::NCHW) - { - const exec::feature::nhwc::Reader<T> from(&tensor); - exec::feature::nchw::View<T> into(shape, _output_buffer, _output_size); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, row, col, ch); - into.at(batch, ch, row, col) = value; - }; - } - else - { - throw std::runtime_error("Wrong Layout"); - } - } - break; - } - default: - throw std::runtime_error("NYI: rank > 4"); - break; - } - } - -private: - T *_output_buffer; - const size_t _output_size; - const ir::Shape _shape; - const bool _copy; - const ir::Layout _io_layout; -}; - -template <typename T> class PermutateSink final : public ITemplSink<T> -{ -public: - PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, - ir::Layout io_layout) - : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout) - { - } - -public: - void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); } -}; - -// Only supports NHWC format front-end(NNAPI) now -template <typename T> class CopySink final : public ITemplSink<T> -{ -public: - CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape, - ir::Layout io_layout = ir::Layout::UNKNOWN) - : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout) - { - } - -public: - void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); } -}; - -} // namespace exec -} // namespace onert - -#endif // __ONERT_EXEC_SINK_H__ diff --git a/runtime/onert/core/src/exec/Source.h b/runtime/onert/core/src/exec/Source.h deleted file mode 100644 index fb2be4dd8..000000000 --- a/runtime/onert/core/src/exec/Source.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_EXEC_SOURCE_H__ -#define __ONERT_EXEC_SOURCE_H__ - -#include "feature/IndexIterator.h" -#include "feature/nchw/Reader.h" -#include "feature/nchw/View.h" -#include "feature/nhwc/Reader.h" -#include "feature/nhwc/View.h" - -#include <cassert> -#include <memory> -#include "util/Utils.h" -#include <ir/Layout.h> -#include "ir/Shape.h" - -namespace onert -{ -namespace exec -{ - -struct ISource -{ - virtual ~ISource() = default; - - virtual void push(::onert::backend::ITensor &tensor) const = 0; -}; - -// Create second lever inheritance: the first lever is used as a reference type in use-case places -template <typename T> class ITemplSource : public ISource -{ -public: - ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, - const bool copy, ir::Layout io_layout) - : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size}, - _shape{shape}, _copy(copy), _io_layout{io_layout} - { - } - - virtual void push(::onert::backend::ITensor &tensor) const = 0; - -protected: - void pushUnif(onert::backend::ITensor &tensor) const - { - assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) || - (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) || - _copy); - auto output_buffer = tensor.buffer(); - auto rank = _shape.rank(); - - if (!tensor.has_padding() && rank < 4 + _copy) - { - memcpy(output_buffer, _input_buffer, _input_size); - return; - } - - switch (rank) - { - case 0: - case 1: - { - memcpy(output_buffer, _input_buffer, _input_size); - break; - } - case 2: - { - const int32_t copy_len = _shape.dim(1); - - for (auto i = 0; i < _shape.dim(0); ++i) - { - ir::Coordinates coords{i, 0}; - memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len, - copy_len * sizeof(T)); - } - break; - } - case 3: - { - const int32_t dim1 = _shape.dim(1); - const int32_t dim2 = _shape.dim(2); - - for (auto i = 0; i < _shape.dim(0); ++i) - { - for (auto j = 0; j < _shape.dim(1); ++j) - { - ir::Coordinates coords{i, j, 0}; - memcpy(output_buffer + tensor.calcOffset(coords), - _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T)); - } - } - break; - } - case 4: - { - if (_copy) - { - const int32_t dim1 = _shape.dim(1); - const int32_t dim2 = _shape.dim(2); - const int32_t dim3 = _shape.dim(3); - for (auto i = 0; i < _shape.dim(0); ++i) - { - for (auto j = 0; j < _shape.dim(1); ++j) - { - for (auto k = 0; k < _shape.dim(2); ++k) - { - ir::Coordinates coords{i, j, k, 0}; - memcpy(output_buffer + tensor.calcOffset(coords), - _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3, - dim3 * sizeof(T)); - } - } - } - } - else - { - const auto shape = _shape.asFeature(_io_layout); - - if (_io_layout == ir::Layout::NCHW) - { - const exec::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size); - exec::feature::nhwc::View<T> into(&tensor); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, row, col, ch) = value; - }; - } - else if (_io_layout == ir::Layout::NHWC) - { - const exec::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size); - exec::feature::nchw::View<T> into(&tensor); - feature::iterate(shape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, row, col, ch); - into.at(batch, ch, row, col) = value; - }; - } - else - { - throw std::runtime_error("Wrong Layout"); - } - } - - break; - } - default: - throw std::runtime_error("NYI: rank > 4"); - break; - } - } - -private: - const T *_input_buffer; - const size_t _input_size; - const ir::Shape _shape; - const bool _copy; - const ir::Layout _io_layout; -}; - -template <typename T> class PermutateSource final : public ITemplSource<T> -{ -public: - PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, - ir::Layout io_layout) - : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout) - { - } - -public: - void push(onert::backend::ITensor &tensor) const override - { - // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation - ITemplSource<T>::pushUnif(tensor); - } -}; - -template <typename T> class CopySource final : public ITemplSource<T> -{ -public: - CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape, - ir::Layout io_layout = ir::Layout::UNKNOWN) - : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout) - { - } - -public: - void push(onert::backend::ITensor &tensor) const override { ITemplSource<T>::pushUnif(tensor); } -}; - -} // namespace exec -} // namespace onert - -#endif // __ONERT_EXEC_SOURCE_H__ diff --git a/runtime/onert/core/src/exec/ThreadPool.cc b/runtime/onert/core/src/exec/ThreadPool.cc index c8e0e3265..bf85e59f6 100644 --- a/runtime/onert/core/src/exec/ThreadPool.cc +++ b/runtime/onert/core/src/exec/ThreadPool.cc @@ -48,7 +48,7 @@ uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); } void ThreadPool::join() { - for (auto &thread : _threads) + for (auto &&thread : _threads) { thread.join(); } diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h new file mode 100644 index 000000000..1d2d375e2 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/MockTensor.h @@ -0,0 +1,66 @@ + +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/ITensor.h" + +template <typename T> class MockTensor : public onert::backend::ITensor +{ +public: + MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout) + : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout) + { + } + +public: + uint8_t *buffer() const override { return _buf; } + + size_t calcOffset(const onert::ir::Coordinates &coords) const override + { + size_t rank = _shape.rank(); + rank = rank == 0 ? 1 : rank; + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i); + offset = offset * dim + coords[i]; + } + offset *= sizeof(T); + + return offset; + } + + onert::ir::Shape getShape() const override { return _shape; } + +public: // DUMMY methods + size_t total_size() const override { return 0; } + onert::ir::Layout layout() const override { return _layout; } + onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; } + float data_scale() const override { return 0; } + int32_t data_zero_point() const override { return 0; } + const std::vector<float> &data_scales() const override { return _dummy_scales; } + const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; } + bool has_padding() const override { return false; } + void access(const std::function<void(ITensor &tensor)> &fn) override {} + bool is_dynamic() const override { return false; } + +private: + uint8_t *_buf = nullptr; + onert::ir::Shape _shape; + onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN; + std::vector<float> _dummy_scales; + std::vector<int32_t> _dummy_zerops; +}; diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h index 7be9df4d5..d5e3cb97c 100644 --- a/runtime/onert/core/src/exec/feature/nchw/Reader.h +++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h @@ -36,35 +36,36 @@ namespace nchw template <typename T> class Reader : public feature::Reader<T> { public: - // Construct for buffer of model inputs - Reader(const ir::FeatureShape &shape, const T *ptr, size_t len) - : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} + using Strides = ir::FeatureShape; + // Construct for buffer and strides + Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len) + : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} { - assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); - - // No padding - _strides.W = sizeof(T); - _strides.H = shape.W * sizeof(T); - _strides.C = shape.W * shape.H * sizeof(T); - _strides.N = shape.W * shape.H * shape.C * sizeof(T); + UNUSED_RELEASE(len); // Workaround for unused variable in release mode + assert(len == static_cast<size_t>(strides.N != 0 + ? shape.N * strides.N + : strides.C != 0 ? shape.C * strides.C + : strides.H != 0 ? shape.H * strides.H + : shape.W * strides.W)); } // Construct for backend tensor Reader(backend::ITensor *tensor) - : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} { assert(tensor->layout() == ir::Layout::NCHW); const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); - _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; - _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; - _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; - _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; - - _shape.W = tensor->dimension(3); - _shape.H = tensor->dimension(2); - _shape.C = tensor->dimension(1); - _shape.N = tensor->dimension(0); + auto shape = tensor->getShape(); + _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.H = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.C = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.W = shape.dim(3); + _shape.H = shape.dim(2); + _shape.C = shape.dim(1); + _shape.N = shape.dim(0); } public: @@ -104,7 +105,6 @@ private: private: // TODO Remove _shape ir::FeatureShape _shape; - using Strides = ir::FeatureShape; Strides _strides; const uint8_t *_ptr; size_t _len; diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc new file mode 100644 index 000000000..f439cafb5 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class Reader_nchw : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW); + _reader = std::make_shared<nchw::Reader<T>>(_tensor.get()); + } + + std::shared_ptr<Reader<T>> _reader = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(Reader_nchw, ReaderTypes); + +TYPED_TEST(Reader_nchw, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createReader(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h index dbaf1a91e..cdbb0cd7c 100644 --- a/runtime/onert/core/src/exec/feature/nchw/View.h +++ b/runtime/onert/core/src/exec/feature/nchw/View.h @@ -37,8 +37,10 @@ namespace nchw template <typename T> class View final : public Reader<T> { public: + using Strides = typename Reader<T>::Strides; // Construct for buffer of model inputs - View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len} + View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len) + : Reader<T>{shape, strides, ptr, len} { // DO NOTHING } diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc new file mode 100644 index 000000000..c6dcda710 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class View_nchw : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW); + _view = std::make_shared<nchw::View<T>>(_tensor.get()); + } + + std::shared_ptr<nchw::View<T>> _view = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(View_nchw, ViewTypes); + +TYPED_TEST(View_nchw, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createView(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h index 7730cee72..0bc1ee95b 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h +++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h @@ -37,36 +37,36 @@ namespace nhwc template <typename T> class Reader : public feature::Reader<T> { public: - // Construct for buffer of model inputs - Reader(const ir::FeatureShape &shape, const T *ptr, size_t len) - : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} + using Strides = ir::FeatureShape; + // Construct for buffer and strides + Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len) + : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len} { UNUSED_RELEASE(len); // Workaround for unused variable in release mode - assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); - - // No padding - _strides.C = sizeof(T); - _strides.W = shape.C * sizeof(T); - _strides.H = shape.C * shape.W * sizeof(T); - _strides.N = shape.C * shape.W * shape.H * sizeof(T); + assert(len == static_cast<size_t>(strides.N != 0 + ? shape.N * strides.N + : strides.H != 0 ? shape.H * strides.H + : strides.W != 0 ? shape.W * strides.W + : shape.C * strides.C)); } // Construct for backend tensor Reader(const backend::ITensor *tensor) - : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} { assert(tensor->layout() == ir::Layout::NHWC); const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); - _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; - _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; - _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; - _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; - - _shape.C = tensor->dimension(3); - _shape.W = tensor->dimension(2); - _shape.H = tensor->dimension(1); - _shape.N = tensor->dimension(0); + auto shape = tensor->getShape(); + _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; + _strides.W = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; + _strides.H = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; + _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; + + _shape.C = shape.dim(3); + _shape.W = shape.dim(2); + _shape.H = shape.dim(1); + _shape.N = shape.dim(0); } public: @@ -106,7 +106,6 @@ private: private: // TODO Remove _shape ir::FeatureShape _shape; - using Strides = ir::FeatureShape; Strides _strides; const uint8_t *_ptr; size_t _len; diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc new file mode 100644 index 000000000..773199042 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class Reader_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC); + _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get()); + } + + std::shared_ptr<nhwc::Reader<T>> _reader = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes); +TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes); + +TYPED_TEST(Reader_nhwc, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createReader(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h index 72c8c3415..c98d050c3 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/View.h +++ b/runtime/onert/core/src/exec/feature/nhwc/View.h @@ -17,7 +17,7 @@ #ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ #define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ -#include "../Reader.h" +#include "Reader.h" #include <cassert> #include <cstddef> @@ -38,8 +38,10 @@ namespace nhwc template <typename T> class View final : public Reader<T> { public: - // Construct for buffer of model inputs - View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len} + using Strides = typename Reader<T>::Strides; + // Construct for buffer and strides + View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len) + : Reader<T>{shape, strides, ptr, len} { // DO NOTHING } diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc new file mode 100644 index 000000000..bdd73d5a7 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include <gtest/gtest.h> + +using namespace onert::exec::feature; + +template <typename T> class View_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC); + _view = std::make_shared<nhwc::View<T>>(_tensor.get()); + } + + std::shared_ptr<nhwc::View<T>> _view = nullptr; + +private: + std::shared_ptr<std::vector<T>> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr<MockTensor<T>> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>; +TYPED_TEST_SUITE(View_nhwc, ViewTypes); +TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes); + +TYPED_TEST(View_nhwc, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createView(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.cc b/runtime/onert/core/src/exec/train/TrainableExecutor.cc new file mode 100644 index 000000000..9c7e70c29 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutor.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableExecutor.h" +#ifdef RUY_PROFILER +#include "ruy/profiler/instrumentation.h" +#endif + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace exec +{ +namespace train +{ + +TrainableExecutor::TrainableExecutor( + std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + backend::train::TrainableBackendContexts &&backend_contexts, + const compiler::train::TensorRegistries &tensor_regs, + compiler::train::TrainableCodeMap &&code_map, const std::vector<ir::OperationIndex> &order, + const util::TracingCtx *tracing_ctx) + : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)}, + _trainable_graph{_lowered_graph->trainable_graph()}, _tensor_regs{std::move(tensor_regs)}, + _mutex(), _tracing_ctx(tracing_ctx) +{ + auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { + assert(tensors.empty()); + for (auto &&ind : ind_seq) + { + backend::ITensor *tensor = tensor_regs.getITensor(ind); + assert(tensor != nullptr); + auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor); + tensors.push_back(io_tensor); + } + }; + build_tensor_list(_trainable_graph.getInputs(), _input_tensors); + build_tensor_list(_trainable_graph.getOutputs(), _output_tensors); + + for (auto &&index : order) + { + auto &trainable_code = code_map.at(index); + _code.emplace_back(std::move(trainable_code)); + } +} + +void TrainableExecutor::execute(const std::vector<backend::IPortableTensor *> &, + const std::vector<backend::IPortableTensor *> &) +{ + throw std::runtime_error("TrainableExecutor does not support multiple subgraphs yet"); +} + +void TrainableExecutor::forward(const IODescription &desc, bool training) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + // TODO Update IO tensors if desc has dynamic input + // Set input(s) + assert(_input_tensors.size() == desc.inputs.size()); + for (uint32_t i = 0; i < _input_tensors.size(); ++i) + { + auto tensor = _input_tensors[i]; + + // TODO Check if (desc.inputs[i] == nullptr) + // TODO Better design for ITensor? (we need const_cast as ITensor is writable) + tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)), + desc.inputs[i]->size); + } + + if (!training) + { + // Set output(s) + assert(_output_tensors.size() == desc.outputs.size()); + for (uint32_t i = 0; i < _output_tensors.size(); ++i) + { + auto tensor = _output_tensors[i]; + + if (desc.outputs[i] == nullptr) + throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."}; + tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size); + } + } + + forwardImpl(training); + + // TODO Update output(s) desc if desc has dynamic input +} + +void TrainableExecutor::forwardImpl(bool training) +{ + if (_tracing_ctx) + { + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph()); + + _subject.notifySubgraphBegin(profiling_subg_index); + for (auto &&code : _code) + { + const auto backend = code.lower_info->backend(); +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend); + + auto &tn_seq = code.tn_seq; + tn_seq->forward(training); + + _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend); + } + _subject.notifySubgraphEnd(profiling_subg_index); + } + else + { + for (auto &&code : _code) + { +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + auto &tn_seq = code.tn_seq; + tn_seq->forward(training); + } + } +} + +void TrainableExecutor::backward(const IODescription &, uint32_t training_step) +{ + // For thread-safe, use mutex + // TODO: if all used backends on this executor are thread-safe, + // do not need to use mutex (otherwise, use mutex) + std::lock_guard<std::mutex> lock(_mutex); + + backwardImpl(training_step); +} + +void TrainableExecutor::backwardImpl(uint32_t training_step) +{ + if (_tracing_ctx) + { + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph()); + + _subject.notifySubgraphBegin(profiling_subg_index); + for (auto it = _code.rbegin(); it != _code.rend(); ++it) + { + const auto &code = *it; + const auto backend = code.lower_info->backend(); +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend); + + auto &tn_seq = code.tn_seq; + tn_seq->backward(training_step); + + _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend); + } + _subject.notifySubgraphEnd(profiling_subg_index); + } + else + { + for (auto it = _code.rbegin(); it != _code.rend(); ++it) + { + const auto &code = *it; +// TODO : Move ruy profiler into ExecutionObserver +#ifdef RUY_PROFILER + ruy::profiler::ScopeLabel label(code.op->name()); +#endif + auto &tn_seq = code.tn_seq; + tn_seq->backward(training_step); + } + } +} + +float TrainableExecutor::getLoss(const ir::IOIndex &pred_io_ind) const +{ + const auto &loss_ind = _trainable_graph.getLossIndex(pred_io_ind); + if (loss_ind.undefined()) + throw std::runtime_error{"Loss " + std::to_string(loss_ind.value()) + " is not defined."}; + backend::ITensor *tensor = _tensor_regs.getITensor(loss_ind); + auto loss_buf = reinterpret_cast<float *>(tensor->buffer()); + return *loss_buf; +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.h b/runtime/onert/core/src/exec/train/TrainableExecutor.h new file mode 100644 index 000000000..6b645305f --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutor.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ +#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ + +#include "exec/IExecutor.h" + +#include "../ExecutionObservee.h" +#include "../../compiler/train/TensorRegistries.h" + +#include "backend/train/TrainableBackendContext.h" +#include "compiler/train/TrainableCodeMap.h" +#include "compiler/train/LoweredTrainableGraph.h" +#include "ir/Index.h" +#include "util/TracingCtx.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +class TrainableExecutor : public IExecutor +{ +public: + /** + * @brief Construct a new TrainableExecutor object + * @param lowered_graph LoweredTrainableGraph object + * @param tensor_builders Tensor builders that are currently used + * @param code_map @c ir::Operation and its code map + */ + TrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph, + backend::train::TrainableBackendContexts &&backend_contexts, + const compiler::train::TensorRegistries &tensor_regs, + compiler::train::TrainableCodeMap &&code_map, + const std::vector<ir::OperationIndex> &order, + const util::TracingCtx *tracing_ctx); + +public: + const ir::Graph &graph() const final { return _trainable_graph.graph(); } + + void execute(const IODescription &desc) override { forward(desc, false); }; + + void execute(const std::vector<backend::IPortableTensor *> &inputs, + const std::vector<backend::IPortableTensor *> &outputs) override; + + void forward(const IODescription &desc, bool training); + void backward(const IODescription &desc, uint32_t training_step); + + // Used only in Dataflow and Parallel Executors + void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final + { + _indexed_ranks = std::move(ranks); + }; + + void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); }; + + const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override + { + return _input_tensors; + } + + const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override + { + return _output_tensors; + } + + float getLoss(const ir::IOIndex &pred_io_ind) const; + + backend::train::TrainableBackendContexts &getBackendContexts() { return _backend_contexts; } + +private: + void forwardImpl(bool training); + void backwardImpl(uint32_t training_step); + +private: + std::vector<compiler::train::TrainableCodeAndInfo> _code; + ExecutionObservee _subject; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; + std::unique_ptr<compiler::train::LoweredTrainableGraph> _lowered_graph; + backend::train::TrainableBackendContexts _backend_contexts; + const ir::train::TrainableGraph &_trainable_graph; + compiler::train::TensorRegistries _tensor_regs; + std::vector<backend::builtin::IOTensor *> _input_tensors; + std::vector<backend::builtin::IOTensor *> _output_tensors; + std::mutex _mutex; + const util::TracingCtx *_tracing_ctx; +}; + +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_ diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.cc b/runtime/onert/core/src/exec/train/TrainableExecutors.cc new file mode 100644 index 000000000..ba39bf0f0 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutors.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainableExecutors.h" + +#include "../../backend/builtin/IOTensor.h" + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace exec +{ +namespace train +{ + +void TrainableExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) +{ + std::unique_ptr<TrainableExecutor> t_exec{ + nnfw::misc::polymorphic_downcast<TrainableExecutor *>(exec.release())}; + _executors.emplace(subg_index, std::move(t_exec)); +} + +TrainableExecutor *TrainableExecutors::at(const ir::ModelIndex &, + const ir::SubgraphIndex &subg_index) const +{ + return _executors.at(subg_index).get(); +} + +uint32_t TrainableExecutors::inputSize() const { return entryExecutor()->getInputTensors().size(); } + +uint32_t TrainableExecutors::outputSize() const +{ + return entryExecutor()->getOutputTensors().size(); +} + +const ir::OperandInfo &TrainableExecutors::inputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getInputTensors().at(index.value())->orig_info(); +} + +const ir::OperandInfo &TrainableExecutors::outputInfo(const ir::IOIndex &index) const +{ + return entryExecutor()->getOutputTensors().at(index.value())->orig_info(); +} + +void TrainableExecutors::execute(const IODescription &desc) +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + entryExecutor()->forward(desc, false); + + // TODO Support multple executors +} + +void TrainableExecutors::train(const IODescription &desc, uint32_t training_step) +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + entryExecutor()->forward(desc, true); + entryExecutor()->backward(desc, training_step); + + // TODO Support multple executors +} + +float TrainableExecutors::getLoss(const ir::IOIndex &index) const +{ + if (_executors.size() > 1) + throw std::runtime_error("TrainableExecutors does not support multiple executors yet"); + return entryExecutor()->getLoss(index); +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.h b/runtime/onert/core/src/exec/train/TrainableExecutors.h new file mode 100644 index 000000000..db6d198b1 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableExecutors.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ +#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ + +#include "TrainableExecutor.h" +#include "exec/IExecutors.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +/** + * @brief Class to gather executor set for trainable model NN package + */ +class TrainableExecutors : public IExecutors +{ +public: + /** + * @brief Construct a new TrainableExecutors object + */ + TrainableExecutors(void) = default; + TrainableExecutors(const TrainableExecutors &) = delete; + TrainableExecutors(TrainableExecutors &&) = default; + + /** + * @brief Destroy the TrainableExecutors object + */ + ~TrainableExecutors() = default; + +public: + TrainableExecutors &operator=(const TrainableExecutors &) = delete; + TrainableExecutors &operator=(TrainableExecutors &&) = default; + +public: + void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, + std::unique_ptr<IExecutor> exec) override; + + TrainableExecutor *at(const ir::ModelIndex &model_index, + const ir::SubgraphIndex &subg_index) const override; + + TrainableExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); } + + uint32_t inputSize() const override; + + uint32_t outputSize() const override; + + const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override; + + const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override; + + void execute(const IODescription &desc) override; + + /** + * @brief Train + * + * @param desc IO information + * @param training_step The number of iterations of an training process. + * In other words, the number of gradient update. + */ + void train(const IODescription &desc, uint32_t training_step); + + float getLoss(const ir::IOIndex &index) const; + +private: + // TODO Append model index to ModelIndex + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<TrainableExecutor>> _executors; +}; + +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__ diff --git a/runtime/onert/core/src/exec/train/TrainableFnSequence.cc b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc new file mode 100644 index 000000000..084b3d708 --- /dev/null +++ b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/train/TrainableFnSequence.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ + +void TrainableFnSequence::forward(bool training) +{ + for (const auto &function : _functions) + { + function->forward(training); + } +} + +void TrainableFnSequence::backward(uint32_t training_step) +{ + for (auto it = _functions.rbegin(); it != _functions.rend(); ++it) + { + (*it)->backward(); + } + + for (const auto &applier : _appliers) + { + applier->applyGradient(training_step); + } +} + +void TrainableFnSequence::append(std::unique_ptr<ITrainableFunction> &&function) +{ + _functions.push_back(std::move(function)); +} + +void TrainableFnSequence::append(std::unique_ptr<IGradientApplier> &&applier) +{ + _appliers.push_back(std::move(applier)); +} + +void TrainableFnSequence::iterate(const std::function<void(ITrainableFunction &)> &fn) +{ + for (const auto &func : _functions) + { + fn(*func); + } +} + +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc new file mode 100644 index 000000000..72b581bf6 --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/train/optimizer/OptimizerCode.h" + +#include <unordered_map> + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +std::string toString(OptimizerCode code) +{ + static const std::unordered_map<OptimizerCode, const char *> map{ + {OptimizerCode::Invalid, "Invalid"}, + {OptimizerCode::SGD, "SGD"}, + {OptimizerCode::Adam, "Adam"}}; + return map.at(code); +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h new file mode 100644 index 000000000..66a08b50f --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ +#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ + +#include "backend/IPortableTensor.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +template <typename T, typename L> +void elementwise(const ir::Shape &shape, const backend::ITensor &src, backend::ITensor &dst, + const L &f) +{ + ShapeLoop(shape, [&](const ir::Coordinates &coords) { + const T src_val = *reinterpret_cast<const T *>(src.buffer() + src.calcOffset(coords)); + T *dst_data = reinterpret_cast<T *>(dst.buffer() + dst.calcOffset(coords)); + *dst_data = f(src_val, *dst_data); + }); +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__ diff --git a/runtime/onert/core/src/exec/train/optimizer/SGD.cc b/runtime/onert/core/src/exec/train/optimizer/SGD.cc new file mode 100644 index 000000000..abfbc1b4b --- /dev/null +++ b/runtime/onert/core/src/exec/train/optimizer/SGD.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <exec/train/optimizer/SGD.h> + +#include "OptimizerHelpers.h" + +namespace onert +{ +namespace exec +{ +namespace train +{ +namespace optimizer +{ + +double SGD::getLearningRate(uint32_t) const +{ + // TODO Use iteration, momentum, and nesterov + return _learning_rate; +} + +void SGD::applyGradient(const UpdateFactors &factors) const +{ + const auto lr = getLearningRate(std::get<size_t>(factors)); + const auto &grad_tensor = std::get<const backend::IPortableTensor &>(factors); + auto &trainable_tensor = std::get<backend::train::ITrainableTensor &>(factors); + assert(trainable_tensor.data_type() == grad_tensor.data_type()); + + const auto shape = trainable_tensor.getShape(); + const auto &grad_shape = grad_tensor.get_info().shape(); + + // TODO Support for different shapes + if (shape != grad_shape) + { + throw std::runtime_error("SGD: Invalid gradient tensor"); + } + + switch (grad_tensor.data_type()) + { + case ir::DataType::FLOAT32: + elementwise<float>(shape, grad_tensor, trainable_tensor, + [&](float src, float dst) -> float { return dst - src * lr; }); + break; + default: + throw std::runtime_error("SGD: Not supported data type"); + } +} + +} // namespace optimizer +} // namespace train +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h deleted file mode 100644 index 24938f74f..000000000 --- a/runtime/onert/core/src/interp/Buffer.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Buffer.h - * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class - */ -#ifndef __ONERT_INTERP_BUFFER_H__ -#define __ONERT_INTERP_BUFFER_H__ - -#include <memory> - -#include "ir/Data.h" - -namespace onert -{ -namespace interp -{ - -/** - * @brief Interface for writable data area - */ -class Buffer : public ir::Data -{ -public: - /** - * @brief Return writable pointer for data area - * @return Writable pointer - */ - virtual uint8_t *baseWritable(void) const = 0; -}; - -/** - * @brief Class for internally allocated data area - */ -class InternalBuffer final : public Buffer -{ -public: - InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size} - { - // DO NOTHING - } - -public: - size_t size(void) const override { return _size; } - const uint8_t *base(void) const override { return _base.get(); } - uint8_t *baseWritable(void) const override { return _base.get(); } - -private: - std::unique_ptr<uint8_t[]> _base; - size_t _size; -}; - -/** - * @brief Class for data area from outside - */ -class ExternalBuffer final : public Buffer -{ -public: - ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size} - { - // DO NOTHING - } - -public: - size_t size(void) const override { return _size; } - const uint8_t *base(void) const override { return _base; } - uint8_t *baseWritable(void) const override { return _base; } - -private: - uint8_t *_base; - size_t _size; -}; - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_BUFFER_H__ diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h deleted file mode 100644 index 7f577ea6e..000000000 --- a/runtime/onert/core/src/interp/ExecEnv.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file ExecEnv.h - * @brief This file contains ExecEnv to access interpreter tensor and execution status - */ -#ifndef __ONERT_INTERP_EXEC_ENV_H_ -#define __ONERT_INTERP_EXEC_ENV_H_ - -#include <unordered_set> - -#include "ir/Graph.h" -#include "Tensor.h" - -namespace onert -{ -namespace interp -{ - -/** - * @brief Class to gather interpreter execution environment - * Each interpreter instance own execution environment - */ -class ExecEnv -{ -public: - /** - * @brief Construct a new Exec Env object (deleted) - */ - ExecEnv(void) = delete; - /** - * @brief Construct a new ExecEnv object - * @param[in] graph Graph to execute by interpreter - */ - explicit ExecEnv(const ir::Graph &graph) : _graph(graph) - { - // DO NOTHING - } - -public: - /** - * @brief Return graph to execute - * @return Graph - */ - const ir::Graph &graph(void) const { return _graph; } - /** - * @brief Assign tensor to environment which have allocated or assigned buffer - * @param[in] index Tensor index - * @param[in] tensor Tensor - */ - void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor) - { - assert(tensor->bufferRO() != nullptr); - _tensors.emplace(index, tensor); - } - - /** - * @brief Return tensor pointer in environment - * @param[in] index Tensor index - * can_optional @c True if tensor can be optional input, otherwise @c false - * @return Tensor pointer - */ - const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const - { - if (_tensors.find(index) == _tensors.end()) - { - // It may optional input, - // otherwise input is not set by runtime user - if (can_optional) - { - return nullptr; - } - - throw std::runtime_error{"ExecEnv: Input is not set"}; - } - - return _tensors.at(index).get(); - } - - /** - * @brief Check environment contains tensor - * @param[in] index Tensor index - * @return @c true if environment contain tensor, otherwise @c false - */ - bool contains(const ir::OperandIndex index) const - { - return (_tensors.find(index) != _tensors.end()); - } - - /** - * @brief Allocate tensor using operand info - * @param[in] index Tensor index - * @param[in] info Operand info - * @note If already allocated, just return - * @TODO More smart allocation policy - */ - void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info) - { - // already allocated, or constant - if (contains(index)) - { - return; - } - - // Buffer from external (ex. model output) - auto tensor = std::make_shared<Tensor>(info); - if (isExtBuffer(index)) - { - tensor->setBuffer(_external_buffers.at(index)); - assignTensor(index, tensor); - - return; - } - - tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size())); - assignTensor(index, tensor); - _buffers.insert(index); - } - - /** - * @brief Allocate read-only tensor and share data with other tensor - * @param[in] index Tensor index - * @param[in] info Operand info - * @param[in] index_to_share Tensor index that have data to share - */ - void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info, - const ir::OperandIndex index_to_share) - { - if (!contains(index_to_share)) - { - throw std::runtime_error{"Cannot find tensor to share data"}; - } - - // already allocated - if (contains(index)) - { - return; - } - - if (isExtBuffer(index)) - { - auto tensor = std::make_shared<Tensor>(info); - tensor->setBuffer(_external_buffers.at(index)); - assignTensor(index, tensor); - } - else - { - auto tensor = std::make_shared<ROTensor>(info); - tensor->setData(tensorAt(index_to_share)->shareData()); - assignTensor(index, tensor); - _buffers.insert(index); - } - } - - /** - * @brief Free buffer if allocated by allocateIfNeed - * @param[in] index Tensor index - * @note If allocated by outside, just return - */ - void freeIfAllocated(const ir::OperandIndex index) - { - if (_buffers.find(index) != _buffers.end()) - { - _tensors.at(index)->releaseData(); - } - } - - /** - * @brief Assign ExternalBuffer into external buffer map - * @param[in] index Tensor index - * @param[in] buffer External buffer - */ - void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer) - { - _external_buffers.emplace(index, buffer); - } - -private: - bool isExtBuffer(const ir::OperandIndex index) - { - return (_external_buffers.find(index) != _external_buffers.end()); - } - -private: - const ir::Graph &_graph; - // Tensor map to use in interpreter - // It should map tensors that have allocated or assigned buffer pointer - std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors; - // Tensors allocated by allocateIfNeed (buffer) - std::unordered_set<ir::OperandIndex> _buffers; - // Tensor buffer from external - std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers; -}; - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_EXEC_ENV_H_ diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc deleted file mode 100644 index cd31a4dca..000000000 --- a/runtime/onert/core/src/interp/InterpExecutor.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "interp/InterpExecutor.h" -#include "interp/ExecEnv.h" -#include "interp/Interpreter.h" - -#include "util/logging.h" - -#include <memory> - -namespace onert -{ -namespace interp -{ - -void InterpExecutor::execute(const exec::IODescription &desc) -{ - /************************************************************************ - * Prepare execution model (submodel) - It may execute divided model - but now consider model inference is done at interpreter - ***********************************************************************/ - ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map; - - for (uint32_t n = 0; n < _graph.getInputs().size(); n++) - { - ir::IOIndex index{n}; - const auto input_index = _graph.getInputs().at(index); - - const auto input = desc.inputs.at(n).get(); - if (input == nullptr) - { - // Optional input - continue; - } - - auto input_tensor = std::make_shared<ROTensor>(input->info); - input_tensor->setData(std::make_shared<const ir::ExternalData>( - reinterpret_cast<const uint8_t *>(input->buffer), input->size)); - tensor_map[input_index] = input_tensor; - } - - /************************************************************************ - * Prepare execution environment - Execution environment will be assigned to invoked interpreter instance - ***********************************************************************/ - - std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph); - - // Assign input/output tensor into interpreter execution environment - for (auto index : _graph.getInputs()) - { - if (tensor_map.find(index) != tensor_map.end()) - { - VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl; - interp_env->assignTensor(index, tensor_map.at(index)); - } - } - - for (uint32_t n = 0; n < _graph.getOutputs().size(); n++) - { - ir::IOIndex index{n}; - const auto output_index = _graph.getOutputs().at(index); - const auto output = desc.outputs.at(n).get(); - if (output == nullptr) - { - // Optional output - continue; - } - - VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value() - << std::endl; - - interp_env->assignExternalBuffer( - output_index, std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), - output->size)); - } - - // Allocate constant tensor - _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (obj.isConstant()) - { - VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value() - << std::endl; - - assert(obj.data()); - auto const_tensor = std::make_shared<ROTensor>(obj.info()); - // Assume that interpreter's tensor layout is same with model (NHWC) - const_tensor->setData( - std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size())); - interp_env->assignTensor(ind, const_tensor); - } - }); - - /***************************************************************************** - * Invoke interpreter - ****************************************************************************/ - - interp::Interpreter interp(std::move(interp_env)); - interp.run(); - - /***************************************************************************** - * Invoked interpreter run is finished - ****************************************************************************/ - - // If interpreter execute submodel - // 1. Get tensor output of submodel into tensor_map to save result - // 2. Generate new ExecEnv for next interpretation -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h deleted file mode 100644 index 2e3f3ca54..000000000 --- a/runtime/onert/core/src/interp/InterpExecutor.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file InterpExecutor.h - * @brief This file contains InterpExecutor class\n - * to manage interpreter execution and environment - */ -#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__ -#define __ONERT_INTERP_INTERP_EXECUTOR_H__ - -#include "ir/OperandIndexMap.h" -#include "ir/Graph.h" -#include "exec/IExecutor.h" - -namespace onert -{ -namespace interp -{ - -class ITensor; - -/** - * @brief Class to execute model using interpreter - */ -class InterpExecutor final : public exec::IExecutor -{ -public: - explicit InterpExecutor(const ir::Graph &graph) : _graph(graph) - { - // DO NOTHING - } - -public: - /** - * @brief Return graph object - * @return Graph object - */ - const ir::Graph &graph() final { return _graph; } - void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{ - // Not implemented - }; - /** - * @brief Start execution - * @note It should be called after setting input and output buffer - */ - void execute(const exec::IODescription &desc) final; - -private: - const ir::Graph &_graph; - ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map; -}; - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__ diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst deleted file mode 100644 index 0714df38a..000000000 --- a/runtime/onert/core/src/interp/InterpOps.lst +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INTERP_OP -#error Define INTERP_OP before including this file -#endif - -// Supported operation name in interpreter -// -// Same list with Operations.lst -// Make comment out if operation is not supported in interpreter -INTERP_OP(BinaryArithmetic) -//INTERP_OP(BatchToSpaceND) -//INTERP_OP(Cast) -INTERP_OP(Conv2D) -INTERP_OP(DepthwiseConv2D) -INTERP_OP(Pool2D) -INTERP_OP(Concat) -INTERP_OP(FullyConnected) -//INTERP_OP(Reduce) -INTERP_OP(Reshape) -INTERP_OP(Softmax) -//INTERP_OP(Squeeze) -//INTERP_OP(Slice) -//INTERP_OP(StridedSlice) -INTERP_OP(ElementwiseActivation) -//INTERP_OP(Transpose) -//INTERP_OP(Exp) -//INTERP_OP(Comparison) -//INTERP_OP(LogicalNot) -//INTERP_OP(LSTM) -//INTERP_OP(RSQRT) -//INTERP_OP(ResizeBilinear) -//INTERP_OP(RNN) -//INTERP_OP(Floor) -//INTERP_OP(SpaceToBatchND) -//INTERP_OP(SpaceToDepth) -//INTERP_OP(EmbeddingLookup) -//INTERP_OP(L2Normalization) -//INTERP_OP(HashtableLookup) -INTERP_OP(InstanceNorm) -//INTERP_OP(PReLU) -INTERP_OP(TransposeConv) -//INTERP_OP(SQRT) -//INTERP_OP(SquaredDifference) -//INTERP_OP(TopKV2) -INTERP_OP(Gather) -//INTERP_OP(Neg) -//INTERP_OP(Abs) -//INTERP_OP(ArgMax) -//INTERP_OP(Dequantize) -//INTERP_OP(LocalResponseNormalization) -//INTERP_OP(DepthToSpace) -//INTERP_OP(Pack) -//INTERP_OP(Split) -//INTERP_OP(Unpack) -INTERP_OP(Pad) -//INTERP_OP(Custom) -//INTERP_OP(Permute) -//INTERP_OP(OneHot) diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc deleted file mode 100644 index b92afbe73..000000000 --- a/runtime/onert/core/src/interp/Interpreter.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Interpreter.h" - -#include <stack> -#include <unordered_set> - -#include "Registration.h" - -#include "ir/OperandIndexMap.h" -#include "util/logging.h" -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace interp -{ - -// TODO more structured execution kernel implementation -// TODO use cker for execution -// TODO divide tensor prepare and execution -// TODO introduce memory manager (buffer allocate and free) -class OperationExecutor -{ -public: - OperationExecutor(ExecEnv *env) : _env{env} - { -#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName(); -#include "InterpOps.lst" -#undef INTERP_OP - } - - void execute(const ir::OperationIndex &idx) - { - const ir::Operation &node = _env->graph().operations().at(idx); - const auto nodeName = node.name(); - VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName - << " operation (id: " << idx.value() << ")" << std::endl; - - const auto nodeOpCode = node.opcode(); - if (_kernels.find(nodeOpCode) == _kernels.end()) - { - throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"}; - } - - if (_kernels[nodeOpCode]->prepare != nullptr) - { - _kernels[nodeOpCode]->prepare(_env, node); - } - _kernels[nodeOpCode]->invoke(_env, node); - } - -private: - ExecEnv *_env; - std::unordered_map<ir::OpCode, OpKernel *> _kernels; -}; - -void Interpreter::run() -{ - VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl; - - // operand_stack: save operands prepared to use - std::stack<ir::OperandIndex> operand_stack; - - // Note: We should push input first, then constant. - // We use use-def for find operators ready to execution, - // but Use-Def cannot handle parameters (maybe constant, but not always) - // Note: If all model inputs are constant, it may not work (depend on tensors' order). - // But that scenario may not exist - for (auto ind : _env->graph().getInputs()) - { - VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl; - - operand_stack.push(ind); - } - - _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (obj.isConstant()) - { - VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl; - - operand_stack.push(ind); - } - }); - - // Execution - std::unordered_set<ir::OperandIndex> ready_check; - std::unordered_set<ir::OperationIndex> executed; - OperationExecutor executor{_env.get()}; - while (!operand_stack.empty()) - { - const auto current_operand_index = operand_stack.top(); - operand_stack.pop(); - VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value() - << " is checked ready to use" << std::endl; - - assert(ready_check.find(current_operand_index) == ready_check.end()); - ready_check.insert(current_operand_index); - - // Find prepared operations by scan use of current operand - std::stack<ir::OperationIndex> operation_stack; - const auto use_operators = _env->graph().operands().at(current_operand_index).getUses(); - for (const auto &use_operator : use_operators) - { - // Assumption: all parameters are ready to use - bool operator_ready = true; - for (auto input_index : _env->graph().operations().at(use_operator).getInputs()) - { - if (ready_check.find(input_index) == ready_check.end()) - { - operator_ready = false; - break; - } - } - - if (operator_ready) - { - VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl; - operation_stack.push(use_operator); - } - } - - while (!operation_stack.empty()) - { - const auto current_operation_index = operation_stack.top(); - operation_stack.pop(); - VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "(" - << _env->graph().operations().at(current_operation_index).name() << ")" - << std::endl; - - // execution - // 1. Prepare output tensor - // 2. Call operation kernel - executor.execute(current_operation_index); - executed.insert(current_operation_index); - - // 3. Push each output into operand stack - const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs(); - for (auto def_operand : def_operands) - { - VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value() - << std::endl; - operand_stack.push(def_operand); - } - - // 4. Free if lifetime of buffer operands used by input is finished - for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs()) - { - const auto use_operators = _env->graph().operands().at(input_index).getUses(); - bool dead_buffer = true; - for (const auto &use_operator : use_operators) - { - if (executed.find(use_operator) == executed.end()) - { - dead_buffer = false; - break; - } - } - - if (dead_buffer) - { - _env->freeIfAllocated(input_index); - } - } - } - } -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h deleted file mode 100644 index d2165f538..000000000 --- a/runtime/onert/core/src/interp/Interpreter.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Interpreter.h - * @brief This file contains Interpreter class for interpretation - */ -#ifndef __ONERT_INTERP_INTERPRETER_H__ -#define __ONERT_INTERP_INTERPRETER_H__ - -#include "ExecEnv.h" - -namespace onert -{ -namespace interp -{ - -/** - * @brief Class for interpretation - */ -class Interpreter -{ - -public: - /** - * @brief Construct a new Interpreter object (deleted) - */ - Interpreter() = delete; - /** - * @brief Construct a new Interpreter object - * @param[in] env Execution environment variable for interpreter object - */ - Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)} - { - // DO NOTHING - } - -public: - /** - * @brief Run interpreter until there is no operation to execute - */ - void run(); - -private: - std::unique_ptr<ExecEnv> _env; -}; - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_INTERPRETER_H__ diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h deleted file mode 100644 index 956b92a53..000000000 --- a/runtime/onert/core/src/interp/Registration.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_INTERP_REGISTRATION_H__ -#define __ONERT_INTERP_REGISTRATION_H__ - -#include "ExecEnv.h" - -#include "ir/Operation.h" - -namespace onert -{ -namespace interp -{ - -struct OpKernel -{ - std::function<void(ExecEnv *, const ir::Operation &)> prepare; - std::function<void(const ExecEnv *, const ir::Operation &)> invoke; -}; - -// Defined in operations/ directory -#define INTERP_OP(InternalName) OpKernel *get##InternalName(); -#include "InterpOps.lst" -#undef INTERP_OP - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_REGISTRATION_H__ diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc deleted file mode 100644 index 07f8b75dc..000000000 --- a/runtime/onert/core/src/interp/Tensor.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Tensor.h" - -#define NO_USE(a) (void)(a) - -namespace onert -{ -namespace interp -{ - -void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); } - -size_t ROTensor::calcOffset(const ir::Coordinates &coords) const -{ - NO_USE(coords); - throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); -} - -size_t Tensor::calcOffset(const ir::Coordinates &coords) const -{ - NO_USE(coords); - throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now."); -} - -ir::Layout ROTensor::layout() const -{ - // TODO Changes to return frontend layout - return ir::Layout::NHWC; -} - -ir::Layout Tensor::layout() const -{ - // TODO Changes to return frontend layout - return ir::Layout::NHWC; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h deleted file mode 100644 index 008a4b9d4..000000000 --- a/runtime/onert/core/src/interp/Tensor.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Tensor.h - * @brief This file contains ITensor interface, ROTensor class, and Tensor class - */ -#ifndef __ONERT_INTERP_TENSOR_H__ -#define __ONERT_INTERP_TENSOR_H__ - -#include "Buffer.h" - -#include "ir/OperandInfo.h" -#include "backend/ITensor.h" -#include "ir/Layout.h" - -namespace onert -{ -namespace interp -{ - -/** - * @brief Interface to handle Tensor in interpreter - */ -class ITensor : public backend::ITensor -{ -public: - virtual ~ITensor() = default; - -public: - virtual uint8_t *buffer() const = 0; - /** - * @brief Return shared pointer for buffer - * @return Buffer shared pointer - */ - virtual std::shared_ptr<const Buffer> shareBuffer() const = 0; - /** - * @brief Return read-only buffer pointer - * @return Read-only buffer pointer - */ - virtual const uint8_t *bufferRO() const = 0; - /** - * @brief Return shared pointer for data - * @return Data shared pointer - */ - virtual std::shared_ptr<const ir::Data> shareData() const = 0; - /** - * @brief Set internal/external buffer - * @param[in] buffer Buffer pointer - */ - virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0; - /** - * @brief Set data reference (including constant, input) - * @param[in] data Data pointer - */ - virtual void setData(std::shared_ptr<const ir::Data> data) = 0; - virtual void releaseData() = 0; - - virtual size_t total_size() const = 0; - virtual size_t dimension(size_t index) const = 0; - virtual size_t num_dimensions() const = 0; - virtual size_t calcOffset(const ir::Coordinates &coords) const = 0; - - virtual bool has_padding() const = 0; - /** - * @brief Return data type of tensor - * @return Data type of tensor - */ - virtual ir::DataType data_type() const = 0; - /** - * @brief Return TensorInfo - * @return TensorInfo - */ - virtual const ir::OperandInfo &tensorInfo() const = 0; - /** - * @brief Return number of elements - * @return Number of elements - */ - virtual uint64_t num_elements() const = 0; - void access(const std::function<void(backend::ITensor &tensor)> &fn) final; -}; - -/** - * @brief Class to handle tensor in interpreter as read-only - */ -class ROTensor final : public ITensor -{ -public: - ROTensor() = delete; - ROTensor(const ir::OperandInfo &info) : _info(info) - { - // DO NOTHING - } - -public: - uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; } - std::shared_ptr<const Buffer> shareBuffer() const override - { - throw std::runtime_error{"Read only tensor"}; - } - const uint8_t *bufferRO() const override { return _data->base(); } - std::shared_ptr<const ir::Data> shareData() const override { return _data; } - void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; } - void setData(std::shared_ptr<const ir::Data> data) override { _data = data; } - void releaseData() override { _data = nullptr; } - - size_t total_size() const override { return _info.total_size(); } - size_t dimension(size_t index) const override { return _info.shape().dim(index); } - size_t num_dimensions() const override { return _info.shape().rank(); } - size_t calcOffset(const ir::Coordinates &coords) const override; - ir::Layout layout() const override; - bool is_dynamic() const override { return false; } - bool has_padding() const override { return false; } - ir::DataType data_type() const override { return _info.typeInfo().type(); } - float data_scale() const override { return _info.typeInfo().scale(); } - int32_t data_offset() const override { return _info.typeInfo().offset(); } - const ir::OperandInfo &tensorInfo() const override { return _info; } - uint64_t num_elements() const override { return _info.shape().num_elements(); }; - -private: - const ir::OperandInfo _info; - std::shared_ptr<const ir::Data> _data{nullptr}; -}; - -/** - * @brief Class to handle tensor in interpreter as writable - */ -class Tensor final : public ITensor -{ -public: - Tensor() = delete; - Tensor(const ir::OperandInfo &info) : _info(info) - { - // DO NOTHING - } - -public: - uint8_t *buffer() const override { return _buffer->baseWritable(); } - std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; }; - const uint8_t *bufferRO() const override { return _buffer->base(); } - std::shared_ptr<const ir::Data> shareData() const override { return _buffer; } - void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; } - void setData(std::shared_ptr<const ir::Data>) override - { - throw std::runtime_error{"Passed data may read-only"}; - } - void releaseData() override { _buffer = nullptr; } - - size_t total_size() const override { return _info.total_size(); } - size_t dimension(size_t index) const override { return _info.shape().dim(index); } - size_t num_dimensions() const override { return _info.shape().rank(); } - size_t calcOffset(const ir::Coordinates &coords) const override; - ir::Layout layout() const override; - bool is_dynamic() const override { return false; } - bool has_padding() const override { return false; } - ir::DataType data_type() const override { return _info.typeInfo().type(); } - float data_scale() const override { return _info.typeInfo().scale(); } - int32_t data_offset() const override { return _info.typeInfo().offset(); } - const ir::OperandInfo &tensorInfo() const override { return _info; } - uint64_t num_elements() const override { return _info.shape().num_elements(); }; - backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; } - -private: - const ir::OperandInfo _info; - std::shared_ptr<const Buffer> _buffer{nullptr}; -}; - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_TENSOR_H__ diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc deleted file mode 100644 index 86e883524..000000000 --- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/BinaryArithmeticOps.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/BinaryArithmetic.h" -#include "misc/polymorphic_downcast.h" -#include "cker/Types.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -enum class OpType -{ - ADD, - SUB, - MUL -}; - -void prepare(ExecEnv *env, const ir::Operation &node) -{ - const auto &arithmetic_node = - nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node); - - const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); - const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); - const auto out_index = node.getOutputs().at(0); - - const auto lhs_tensor = env->tensorAt(lhs_index); - const auto rhs_tensor = env->tensorAt(rhs_index); - - // Check shape and type lhs is same with rhs - // TODO Util function to compare TensorInfo - if (lhs_tensor->data_type() != rhs_tensor->data_type()) - { - throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"}; - } - - bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()); - if (try_broadcast) - { - bool success = true; - auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(), - rhs_tensor->tensorInfo().shape(), success); - if (!success) - { - throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"}; - } - - auto output_info = - ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo()); - // We can handle already allocated (ex. model output) - env->allocateIfNeeded(out_index, output_info); - } - else - { - // Output's shape and type is same with input - auto output_info = lhs_tensor->tensorInfo(); - // We can handle already allocated (ex. model output) - env->allocateIfNeeded(out_index, output_info); - } - - auto out_tensor = env->tensorAt(out_index); - // Check shape and type lhs is same with output - // TODO Util function to compare TensorInfo - if (lhs_tensor->data_type() != out_tensor->data_type()) - { - throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"}; - } -} - -inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params) -{ - params->float_activation_min = min; - params->float_activation_max = max; -} - -inline void setActivationParams(int32_t min, int32_t max, - nnfw::cker::BinaryArithmeticOpParam *params) -{ - params->quantized_activation_min = min; - params->quantized_activation_max = max; -} - -template <typename raw_type, OpType op_type> -void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, - const ir::operation::BinaryArithmetic::Param ¶m) -{ - const auto lhs_buffer = lhs_tensor->bufferRO(); - const auto rhs_buffer = rhs_tensor->bufferRO(); - auto out_buffer = out_tensor->buffer(); - - nnfw::cker::BinaryArithmeticOpParam cker_param; - raw_type activation_min, activation_max; - calculateActivationRange(param.activation, &activation_min, &activation_max); - setActivationParams(activation_min, activation_max, &cker_param); - const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer); - const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer); - raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer); - - const auto cker_op_type = - (op_type == OpType::ADD) - ? nnfw::cker::BinaryArithmeticOpType::ADD - : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB - : nnfw::cker::BinaryArithmeticOpType::MUL); - - const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes( - convertShape(lhs_tensor->tensorInfo().shape()), - convertShape(rhs_tensor->tensorInfo().shape()), &cker_param); - - if (need_broadcast) - { - const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); - const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, - rhs_ptr, out_shape, out_ptr); - return; - } - - const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape()); - const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, - out_shape, out_ptr); -} - -template <OpType op_type> -void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node) -{ - const auto lhs_index = node.getInputs().at(node.LHS); - const auto rhs_index = node.getInputs().at(node.RHS); - const auto out_index = node.getOutputs().at(0); - const auto lhs_tensor = env->tensorAt(lhs_index); - const auto rhs_tensor = env->tensorAt(rhs_index); - const auto out_tensor = env->tensorAt(out_index); - const auto data_type = lhs_tensor->data_type(); - - if (data_type == ir::DataType::INT32) - { - invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param()); - } - else if (data_type == ir::DataType::FLOAT32) - { - invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param()); - } - else - { - throw std::runtime_error{"NYI: Unsupported data type"}; - } -} - -void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node) -{ - const auto &arithmetic_node = - nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node); - - switch (arithmetic_node.param().arithmetic_type) - { - case ir::operation::BinaryArithmetic::ArithmeticType::ADD: - invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node); - break; - case ir::operation::BinaryArithmetic::ArithmeticType::SUB: - invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node); - break; - case ir::operation::BinaryArithmetic::ArithmeticType::MUL: - invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node); - break; - default: - throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " + - arithmetic_node.name()}; - break; - } -} - -} // namespace - -OpKernel *getBinaryArithmetic() -{ - static OpKernel kernel = {prepare, invokeBinaryArithmeticOps}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc deleted file mode 100644 index efc46c66b..000000000 --- a/runtime/onert/core/src/interp/operations/Concat.cc +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/Concatenation.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Concat.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace concat -{ - -void prepareConcat(ExecEnv *env, const ir::Operation &node) -{ - const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); - - const auto first_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - const auto first_tensor = env->tensorAt(first_index); - uint32_t out_axis_dimension = 0; - const int32_t axis_raw = concat_node.param().axis; - const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw; - - // All inputs shape should be same except axis dimension - // All inputs type should be same - for (auto input : node.getInputs()) - { - assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions()); - assert(first_tensor->data_type() == env->tensorAt(input)->data_type()); - for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) - { - if (i == axis) - { - out_axis_dimension += env->tensorAt(input)->dimension(i); - continue; - } - assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i)); - } - } - - // Make output tensor info using first input tensor info, and accumulated axis dimension value - auto out_shape = first_tensor->tensorInfo().shape(); - out_shape.dim(axis) = out_axis_dimension; - env->allocateIfNeeded(out_index, ir::OperandInfo::createStaticInfo( - out_shape, first_tensor->tensorInfo().typeInfo())); - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Output shape should be same with input except axis dimension - // Output type should be same with input - assert(first_tensor->data_type() == out_tensor->data_type()); - for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++) - { - if (i == axis) - { - continue; - } - assert(first_tensor->dimension(i) == out_tensor->dimension(i)); - } -} - -void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis) -{ - const uint32_t count = in_tensors.size(); - - // Calculate - nnfw::cker::ConcatenationParams cker_param; - cker_param.axis = (int8_t)axis; - cker_param.inputs_count = count; - - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - - std::vector<nnfw::cker::Shape> in_shapes; - std::vector<const nnfw::cker::Shape *> in_shape_ptrs; - in_shapes.reserve(count); - in_shape_ptrs.reserve(count); - std::vector<const float *> in_ptrs; - for (uint32_t i = 0; i < count; i++) - { - in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape())); - in_shape_ptrs.push_back(&in_shapes[i]); - in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO())); - } - - auto out_buffer = out_tensor->buffer(); - float *out_ptr = reinterpret_cast<float *>(out_buffer); - - nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape, - out_ptr); -} - -void invokeConcat(const ExecEnv *env, const ir::Operation &node) -{ - const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node); - const int32_t axis_raw = concat_node.param().axis; - - std::vector<const ITensor *> in_tensors; - for (const auto &e : concat_node.getInputs()) - { - in_tensors.emplace_back(env->tensorAt(e)); - } - - const auto out_index = node.getOutputs().at(0); - const auto out_tensor = env->tensorAt(out_index); - const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw; - - const auto data_type = in_tensors[0]->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(in_tensors, out_tensor, axis); - } - else - { - throw std::runtime_error{"NYI: Support float32 only"}; - } -} -} // namespace concat - -OpKernel *getConcat() -{ - static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc deleted file mode 100644 index bb00b828c..000000000 --- a/runtime/onert/core/src/interp/operations/Conv2D.cc +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/Conv.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Conv2D.h" -#include "util/Utils.h" -#include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace conv2d -{ - -void prepareConv2D(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT); - const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); - const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - const auto kernel_tensor = env->tensorAt(kernel_index); - const auto bias_tensor = env->tensorAt(bias_index); - - assert(in_tensor->num_dimensions() == 4); - assert(kernel_tensor->num_dimensions() == 4); - assert(bias_tensor->num_dimensions() == 1); - - UNUSED_RELEASE(in_tensor); - UNUSED_RELEASE(kernel_tensor); - UNUSED_RELEASE(bias_tensor); - - const auto output_info = env->graph().operands().at(out_index).info(); - if (output_info.total_size() == 0) - { - // Handle unspecified output shape - const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); - const auto infered_output_shape = shape_inference::inferConv2DShape( - in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param()); - env->allocateIfNeeded( - out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo())); - } - else - { - env->allocateIfNeeded(out_index, output_info); - } - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Handle same ifm & ofm data type only - assert(in_tensor->data_type() == out_tensor->data_type()); - assert(out_tensor->num_dimensions() == 4); -} - -void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, - const ITensor *ofm_tensor, const ir::operation::Conv2D::Param ¶m) -{ - // TODO Support NCHW frontned - const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. - const auto &ker_shape = ker_tensor->tensorInfo().shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); - const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, - ker_width, ker_height); - - // Calculate - float activation_min, activation_max; - calculateActivationRange(param.activation, &activation_min, &activation_max); - - nnfw::cker::ConvParams cker_param; - cker_param.padding_type = convertPaddingType(param.padding.type); - cker_param.padding_values.width = padding.left; - cker_param.padding_values.height = padding.top; - cker_param.stride_width = param.stride.horizontal; - cker_param.stride_height = param.stride.vertical; - cker_param.dilation_width_factor = 1; - cker_param.dilation_height_factor = 1; - cker_param.float_activation_min = activation_min; - cker_param.float_activation_max = activation_max; - - const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); - const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); - const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); - const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); - const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); - const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); - const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); - float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); - - nnfw::cker::Conv conv_kernel; - conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape, - bias_ptr, cker_ofm_shape, ofm_ptr); -} - -void invokeConv2D(const ExecEnv *env, const ir::Operation &node) -{ - const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node); - - const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT); - const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); - const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); - const auto ofm_index = node.getOutputs().at(0); - - const auto ifm_tensor = env->tensorAt(ifm_index); - const auto ker_tensor = env->tensorAt(ker_index); - const auto bias_tensor = env->tensorAt(bias_index); - const auto ofm_tensor = env->tensorAt(ofm_index); - - const auto data_type = ifm_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); - } - else - { - throw std::runtime_error{"NYI: Support float32 only"}; - } -} -} // namespace conv2d - -OpKernel *getConv2D() -{ - static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc deleted file mode 100644 index 0473855d9..000000000 --- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/DepthwiseConv.h> -#include <misc/polymorphic_downcast.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/DepthwiseConv2D.h" -#include "util/Utils.h" -#include "util/ShapeInference.h" - -namespace onert -{ -namespace interp -{ - -namespace -{ - -void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); - const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); - const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - const auto kernel_tensor = env->tensorAt(kernel_index); - const auto bias_tensor = env->tensorAt(bias_index); - - assert(in_tensor->num_dimensions() == 4); - assert(kernel_tensor->num_dimensions() == 4); - assert(bias_tensor->num_dimensions() == 1); - - UNUSED_RELEASE(in_tensor); - UNUSED_RELEASE(kernel_tensor); - UNUSED_RELEASE(bias_tensor); - - // TODO handle unspecified output shape: - // calculate output shape using ifm shape, kernel shape, padding, stride - const auto output_info = env->graph().operands().at(out_index).info(); - if (output_info.total_size() == 0) - { - // Handle unspecified output shape - const auto &depth_conv_node = - nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node); - const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape( - in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), - depth_conv_node.param()); - env->allocateIfNeeded( - out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo())); - } - else - { - env->allocateIfNeeded(out_index, output_info); - } - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Handle same ifm & ofm data type only - assert(in_tensor->data_type() == out_tensor->data_type()); - assert(out_tensor->num_dimensions() == 4); -} - -void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, - const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param ¶m) -{ - // TODO Support NCHW frontend - const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - // Kernel format is [1, kernel_height, kernel_width, depth_out]. - const auto &ker_shape = ker_tensor->tensorInfo().shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); - const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, - ker_width, ker_height); - - // Calculate - float activation_min, activation_max; - calculateActivationRange(param.activation, &activation_min, &activation_max); - - nnfw::cker::DepthwiseConvParams cker_param; - cker_param.padding_values.width = padding.left; - cker_param.padding_values.height = padding.top; - cker_param.depth_multiplier = param.multiplier; - cker_param.stride_width = param.stride.horizontal; - cker_param.stride_height = param.stride.vertical; - cker_param.dilation_width_factor = 1; - cker_param.dilation_height_factor = 1; - cker_param.float_activation_min = activation_min; - cker_param.float_activation_max = activation_max; - - const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); - const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); - const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); - const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); - const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); - const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); - const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO()); - float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); - - nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, - cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); -} - -void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) -{ - const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node); - - const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT); - const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); - const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); - const auto ofm_index = node.getOutputs().at(0); - - const auto ifm_tensor = env->tensorAt(ifm_index); - const auto ker_tensor = env->tensorAt(ker_index); - const auto bias_tensor = env->tensorAt(bias_index); - const auto ofm_tensor = env->tensorAt(ofm_index); - - const auto data_type = ifm_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); - } - else - { - throw std::runtime_error{"NYI: Support float32 only"}; - } -} - -} // namespace - -OpKernel *getDepthwiseConv2D() -{ - static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc deleted file mode 100644 index c8773bef4..000000000 --- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cmath> - -#include "OperationUtil.h" - -#include "interp/Registration.h" - -#include "ir/operation/ElementwiseActivation.h" - -#include <misc/polymorphic_downcast.h> -#include <cker/operation/Logistic.h> -#include <cker/operation/Tanh.h> - -namespace onert -{ -namespace interp -{ -namespace -{ - -enum class ActivationType -{ - Logistic, - ReLU, - Tanh -}; - -void prepare(ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(0); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - - const auto output_info = env->graph().operands().at(output_index).info(); - if (output_info.total_size() == 0) - { - // Output's shape and type is same with input - auto input_info = input_tensor->tensorInfo(); - // We can handle already allocated (ex. model output) - env->allocateIfNeeded(output_index, input_info); - } - else - { - env->allocateIfNeeded(output_index, output_info); - } - - const auto output_tensor = env->tensorAt(output_index); - // Check shape and type lhs is same with output - // TODO Util function to compare TensorInfo - if (input_tensor->data_type() != output_tensor->data_type()) - { - throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"}; - } -} - -template <ActivationType act_type> -void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha, - float beta) -{ - std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); }; - switch (act_type) - { - case ActivationType::ReLU: - fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); }; - break; - case ActivationType::Tanh: - fn = [](const float &in) { return std::tanh(in); }; - break; - default: - throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"}; - break; - } - - const float *input_end = input_ptr + num_elements; - for (; input_ptr < input_end; input_ptr++, output_ptr++) - { - *output_ptr = fn(*input_ptr); - } -} - -template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(0); - const auto output_index = node.getOutputs().at(0); - - // Check lhs shape is same with rhs (with broadcast) - const auto input_tensor = env->tensorAt(input_index); - const auto output_tensor = env->tensorAt(output_index); - - const auto data_type = input_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - uint64_t elements = input_tensor->num_elements(); - const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO()); - float *out = reinterpret_cast<float *>(output_tensor->buffer()); - if (act_type == ActivationType::Logistic) - { - const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); - const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); - nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out); - } - else - { - const auto &act_node = - nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node); - evalFloat<act_type>(input_start, out, elements, act_node.param().alpha, - act_node.param().beta); - } - } - else - { - throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"}; - } -} - -void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node) -{ - const auto &act_node = - nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node); - switch (act_node.param().op_type) - { - case ir::operation::ElementwiseActivation::Type::LOGISTIC: - invoke<ActivationType::Logistic>(env, node); - break; - case ir::operation::ElementwiseActivation::Type::RELU: - invoke<ActivationType::ReLU>(env, node); - break; - case ir::operation::ElementwiseActivation::Type::TANH: - invoke<ActivationType::Tanh>(env, node); - break; - default: - throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation"); - } -} - -} // namespace - -OpKernel *getElementwiseActivation() -{ - static OpKernel kernel = {prepare, invokeElementwiseActivation}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc deleted file mode 100644 index 12f529dab..000000000 --- a/runtime/onert/core/src/interp/operations/FullyConnected.cc +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/FullyConnected.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/FullyConnected.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace fc -{ - -void prepareFC(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); - const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); - const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - const auto kernel_tensor = env->tensorAt(kernel_index); - const auto bias_tensor = env->tensorAt(bias_index); - - UNUSED_RELEASE(in_tensor); - UNUSED_RELEASE(kernel_tensor); - UNUSED_RELEASE(bias_tensor); - - assert(in_tensor->num_dimensions() >= 2); - assert(kernel_tensor->num_dimensions() == 2); - assert(bias_tensor->num_dimensions() == 1); - - const auto input_size_with_batch = in_tensor->num_elements(); - const auto num_units = kernel_tensor->dimension(0); - const auto input_size = kernel_tensor->dimension(1); - const auto batch_size = input_size_with_batch / input_size; - assert(input_size_with_batch % input_size == 0); - assert(num_units == bias_tensor->dimension(0)); - - // Make output tensor info - ir::Shape output_shape(2); - output_shape.dim(0) = batch_size; - output_shape.dim(1) = num_units; - const auto out_info = - ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo()); - env->allocateIfNeeded(out_index, out_info); - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Handle same ifm & ofm data type only - assert(in_tensor->data_type() == out_tensor->data_type()); - assert(out_tensor->num_dimensions() == 2); - assert(out_tensor->dimension(0) == batch_size); - assert(out_tensor->dimension(1) == num_units); -} - -void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor, - const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param ¶m) -{ - const auto ifm_buffer = ifm_tensor->bufferRO(); - const auto ker_buffer = ker_tensor->bufferRO(); - const auto bias_buffer = bias_tensor->bufferRO(); - auto ofm_buffer = ofm_tensor->buffer(); - - // Calculate - nnfw::cker::FullyConnectedParams cker_param; - cker_param.activation = convertActivationType(param.activation); - calculateActivationRange(param.activation, &cker_param.float_activation_min, - &cker_param.float_activation_max); - const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); - const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); - const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape()); - const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); - const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer); - const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer); - const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer); - float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer); - - nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, - cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); -} - -void invokeFC(const ExecEnv *env, const ir::Operation &node) -{ - const auto &conv_node = - nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node); - - const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT); - const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); - const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); - const auto ofm_index = node.getOutputs().at(0); - - const auto ifm_tensor = env->tensorAt(ifm_index); - const auto ker_tensor = env->tensorAt(ker_index); - const auto bias_tensor = env->tensorAt(bias_index); - const auto ofm_tensor = env->tensorAt(ofm_index); - - const auto data_type = ifm_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param()); - } - else - { - throw std::runtime_error{"NYI: Support float only"}; - } -} -} // namespace fc - -OpKernel *getFullyConnected() -{ - static OpKernel kernel = {fc::prepareFC, fc::invokeFC}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc deleted file mode 100644 index 9e82def5f..000000000 --- a/runtime/onert/core/src/interp/operations/Gather.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/Gather.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Gather.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepareGather(ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); - const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - const auto indices_tensor = env->tensorAt(indices_index); - - // TODO handle unspecified output shape: - // calculate output shape using ifm shape, kernel shape, padding, stride - const auto output_info = env->graph().operands().at(output_index).info(); - if (output_info.total_size() == 0) - { - throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"}; - } - else - { - env->allocateIfNeeded(output_index, output_info); - } - - if (indices_tensor->data_type() != ir::DataType::INT32) - { - throw std::runtime_error{"Interp(Gather): Invalid indices data type"}; - } - - auto output_tensor = env->tensorAt(output_index); - auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1; - - if (output_rank != output_tensor->num_dimensions()) - { - throw std::runtime_error{"Interp(Gather): Invalid output rank"}; - } - if (output_tensor->data_type() != input_tensor->data_type()) - { - throw std::runtime_error{"Interp(Gather): Invalid output data type"}; - } - - if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM && - input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo()) - { - throw std::runtime_error{ - "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"}; - } -} - -template <typename raw_type> -void invoke(const ITensor *input_tensors, const ITensor *indices_tensors, - const ITensor *output_tensor, uint32_t axis) -{ - // Calculate - nnfw::cker::GatherParams cker_param; - cker_param.axis = (int8_t)axis; - - const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape()); - const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape()); - const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); - const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO()); - const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO()); - raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer()); - - nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape, - indices_ptr, cker_output_shape, output_ptr); -} - -void invokeGather(const ExecEnv *env, const ir::Operation &node) -{ - const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node); - const int32_t axis_raw = gather_node.param().axis; - - const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT); - const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - const auto indices_tensor = env->tensorAt(indices_index); - const auto output_tensor = env->tensorAt(output_index); - const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw; - - const auto data_type = input_tensor->data_type(); - - switch (data_type) - { - case ir::DataType::FLOAT32: - invoke<float>(input_tensor, indices_tensor, output_tensor, axis); - break; - case ir::DataType::INT32: - invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis); - break; - case ir::DataType::QUANT_UINT8_ASYMM: - invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis); - break; - default: - throw std::runtime_error{"Interp(Gather): NYI - Not supported type"}; - } -} - -} // namespace - -OpKernel *getGather() -{ - static OpKernel kernel = {prepareGather, invokeGather}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc deleted file mode 100644 index 2538bcc39..000000000 --- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/InstanceNorm.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/InstanceNorm.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace instancenorm -{ - -void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node) -{ - const auto &instancenorm_node = - nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); - - const auto input_index = node.getInputs().at(instancenorm_node.INPUT); - const auto output_index = node.getOutputs().at(0); - const auto input_tensor = env->tensorAt(input_index); - - if (input_tensor->num_dimensions() != 4) - { - throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"}; - } - - // Output shape should be same with input - env->allocateIfNeeded(output_index, input_tensor->tensorInfo()); - - auto output_tensor = env->tensorAt(output_index); - UNUSED_RELEASE(output_tensor); - - // Handle same ifm & ofm data type only - assert(input_tensor->data_type() == output_tensor->data_type()); - assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape()); -} - -inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params) -{ - params->float_activation_min = min; - params->float_activation_max = max; -} - -void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor, - const ITensor *output_tensor, const ir::operation::InstanceNorm::Param ¶m) -{ - // Calculate - float activation_min, activation_max; - calculateActivationRange(param.activation, &activation_min, &activation_max); - - nnfw::cker::InstanceNormParams cker_param; - cker_param.epsilon = param.epsilon; - cker_param.float_activation_min = activation_min; - cker_param.float_activation_max = activation_max; - - const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); - const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape()); - const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape()); - const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); - const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO()); - const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO()); - const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO()); - float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer()); - - nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr, - cker_beta_shape, beta_ptr, cker_output_shape, output_ptr); -} - -void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node) -{ - const auto &instancenorm_node = - nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node); - - const auto input_index = node.getInputs().at(instancenorm_node.INPUT); - const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA); - const auto beta_index = node.getInputs().at(instancenorm_node.BETA); - const auto out_index = node.getOutputs().at(0); - const auto input_tensor = env->tensorAt(input_index); - const auto gamma_tensor = env->tensorAt(gamma_index); - const auto beta_tensor = env->tensorAt(beta_index); - const auto out_tensor = env->tensorAt(out_index); - const auto data_type = input_tensor->data_type(); - - if (data_type == ir::DataType::FLOAT32) - { - invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param()); - } - else - { - throw std::runtime_error{"NYI: Unsupported data type"}; - } -} -} // namespace instancenorm - -OpKernel *getInstanceNorm() -{ - static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h deleted file mode 100644 index 2fdf098f0..000000000 --- a/runtime/onert/core/src/interp/operations/OperationUtil.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_ -#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_ - -#include "ir/Shape.h" -#include "ir/InternalType.h" -#include "ir/Padding.h" - -#include <cker/Shape.h> -#include <cker/Types.h> - -namespace onert -{ -namespace interp -{ - -inline nnfw::cker::Shape convertShape(const ir::Shape &shape) -{ - auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); - - std::vector<int32_t> raw_shape; - raw_shape.resize(dimensions.size()); - - for (uint32_t i = 0; i < dimensions.size(); ++i) - { - raw_shape[i] = dimensions[i]; - } - - return nnfw::cker::GetShape(raw_shape); -} - -inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape) -{ - auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end()); - - const int32_t extended_rank = 4; - int32_t raw_shape[extended_rank]; - uint32_t start = extended_rank - dimensions.size(); - - for (uint32_t i = 0; i < extended_rank; ++i) - { - if (i < start) - { - raw_shape[i] = 1; - } - else - { - raw_shape[i] = dimensions[i - start]; - } - } - - return nnfw::cker::Shape(extended_rank, raw_shape); -} - -inline nnfw::cker::FusedActivationFunctionType -convertActivationType(const ir::Activation activation) -{ - switch (activation) - { - case ir::Activation::NONE: - return nnfw::cker::FusedActivationFunctionType::kNone; - case ir::Activation::RELU: - return nnfw::cker::FusedActivationFunctionType::kRelu; - case ir::Activation::RELU1: - return nnfw::cker::FusedActivationFunctionType::kRelu1; - case ir::Activation::RELU6: - return nnfw::cker::FusedActivationFunctionType::kRelu6; - default: - throw std::runtime_error{"CPU backend: Cannot convert activation type"}; - } -} - -template <typename T> -void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) -{ - if (activation == ir::Activation::RELU) - { - *activation_min = 0; - *activation_max = std::numeric_limits<T>::max(); - } - else if (activation == ir::Activation::RELU6) - { - *activation_min = 0; - *activation_max = 6; - } - else if (activation == ir::Activation::RELU1) - { - *activation_min = -1; - *activation_max = 1; - } - else if (activation == ir::Activation::NONE) - { - *activation_min = std::numeric_limits<T>::lowest(); - *activation_max = std::numeric_limits<T>::max(); - } - else - { - throw std::runtime_error{"Unsupported activation type"}; - } -} - -inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success) -{ - int lhs_rank = lhs.rank(); - int rhs_rank = rhs.rank(); - - int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank); - ir::Shape out_shape(out_rank); - - int lhs_idim = lhs_rank - 1; - int rhs_idim = rhs_rank - 1; - success = true; - for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--) - { - if (lhs_idim == -1 && rhs_idim == -1) - { - // invalid result - success = false; - break; - } - - if (lhs_idim == -1) - { - out_shape.dim(out_idim) = rhs.dim(rhs_idim); - rhs_idim--; - } - else if (rhs_idim == -1) - { - out_shape.dim(out_idim) = lhs.dim(lhs_idim); - lhs_idim--; - } - else - { - if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim)) - { - out_shape.dim(out_idim) = lhs.dim(lhs_idim); - lhs_idim--; - rhs_idim--; - } - else if (lhs.dim(lhs_idim) == 1) - { - out_shape.dim(out_idim) = rhs.dim(rhs_idim); - lhs_idim--; - rhs_idim--; - } - else if (rhs.dim(rhs_idim) == 1) - { - out_shape.dim(out_idim) = lhs.dim(lhs_idim); - lhs_idim--; - rhs_idim--; - } - else - { - // invalid result - success = false; - break; - } - } - } - - if (lhs_idim != -1 || rhs_idim != -1) - { - // invalid result - success = false; - } - return out_shape; -} - -inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type) -{ - switch (ir_padding_type) - { - case ir::PaddingType::EXPLICIT: - return nnfw::cker::PaddingType::kNone; - case ir::PaddingType::SAME: - return nnfw::cker::PaddingType::kSame; - case ir::PaddingType::VALID: - return nnfw::cker::PaddingType::kValid; - default: - throw std::runtime_error("Wrong padding type."); - break; - } -} - -} // namespace interp -} // namespace onert - -#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_ diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc deleted file mode 100644 index c8dce698d..000000000 --- a/runtime/onert/core/src/interp/operations/Pad.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/Pad.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Pad.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void preparePad(ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - - const auto output_info = env->graph().operands().at(output_index).info(); - - // Check shape and type lhs is same with rhs - // TODO Util function to compare TensorInfo - if (output_info.total_size() == 0) - { - throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"}; - } - else - { - env->allocateIfNeeded(output_index, output_info); - } - - const auto output_tensor = env->tensorAt(output_index); - if (input_tensor->data_type() != output_tensor->data_type()) - { - throw std::runtime_error{"Interp(Pad): Invalid output type"}; - } -} - -void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor) -{ - const auto input_buffer = input_tensor->bufferRO(); - const auto pad_buffer = pad_tensor->bufferRO(); - auto output_buffer = output_tensor->buffer(); - - int32_t pad_rank = pad_tensor->dimension(0); - - const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); - const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); - const float *input_ptr = reinterpret_cast<const float *>(input_buffer); - const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer); - float *output_ptr = reinterpret_cast<float *>(output_buffer); - - nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, - output_ptr, nullptr); -} - -void invokePad(const ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT); - const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - const auto pad_tensor = env->tensorAt(pad_index); - const auto output_tensor = env->tensorAt(output_index); - - const auto data_type = input_tensor->data_type(); - - if (data_type == ir::DataType::FLOAT32) - { - invoke(input_tensor, pad_tensor, output_tensor); - } - else - { - throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"}; - } -} -} // namespace - -OpKernel *getPad() -{ - static OpKernel kernel = {preparePad, invokePad}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc deleted file mode 100644 index 92f9d70b2..000000000 --- a/runtime/onert/core/src/interp/operations/Pool2D.cc +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/AveragePool.h> -#include <cker/operation/MaxPool.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Pool2D.h" -#include "util/Utils.h" -#include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace pool2d -{ - -void preparePool2D(ExecEnv *env, const ir::Operation &node) -{ - const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node); - const auto in_index = node.getInputs().at(pool_node.INPUT); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - UNUSED_RELEASE(in_tensor); - - assert(in_tensor->num_dimensions() == 4); - - const auto output_info = env->graph().operands().at(out_index).info(); - if (output_info.total_size() == 0) - { - // Handle unspecified output shape - const auto infered_output_shape = - shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param()); - env->allocateIfNeeded( - out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo())); - } - else - { - env->allocateIfNeeded(out_index, output_info); - } - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Handle same ifm & ofm data type only - assert(in_tensor->data_type() == out_tensor->data_type()); - assert(out_tensor->num_dimensions() == 4); -} - -template <typename T> -void invoke(const nnfw::cker::PoolParams ¶ms, const nnfw::cker::Shape &in_shape, - const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr, - ir::operation::Pool2D::PoolType op_type) -{ - switch (op_type) - { - case ir::operation::Pool2D::PoolType::AVG: - nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr); - break; - case ir::operation::Pool2D::PoolType::MAX: - nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr); - break; - default: - throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"}; - break; - } -} - -void invokePool2DOps(const ExecEnv *env, const ir::Operation &node) -{ - const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node); - - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - // Check lhs shape is same with rhs (with broadcast) - const auto in_tensor = env->tensorAt(in_index); - const auto out_tensor = env->tensorAt(out_index); - - // TODO support NCHW frontend - const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto param = pool_node.param(); - const auto padding = - ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh); - // Calculate - nnfw::cker::PoolParams cker_param; - cker_param.filter_width = param.kw; - cker_param.filter_height = param.kh; - cker_param.padding_values.width = padding.left; - cker_param.padding_values.height = padding.top; - cker_param.stride_width = param.stride.horizontal; - cker_param.stride_height = param.stride.vertical; - - const auto data_type = in_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - calculateActivationRange(param.activation, &cker_param.float_activation_min, - &cker_param.float_activation_max); - - const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); - float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); - // Now, invoke() supports only Pool2D in float - invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type); - } - else - { - throw std::runtime_error{"NYI: Support float only"}; - } -} -} // namespace pool2d - -OpKernel *getPool2D() -{ - static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc deleted file mode 100644 index 3a118456b..000000000 --- a/runtime/onert/core/src/interp/operations/Reshape.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "interp/Registration.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepare(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - // Unspecified shape is not supported in operation node spec now - const auto output_info = env->graph().operands().at(out_index).info(); - env->allocateAndShareIfNeeded(out_index, output_info, in_index); - - assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size()); -} - -void invoke(const ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO()) - { - // Same data - return; - } - - const auto output_info = env->graph().operands().at(out_index).info(); - memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(), - output_info.total_size()); -} - -} // namespace - -OpKernel *getReshape() -{ - static OpKernel kernel = {prepare, invoke}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc deleted file mode 100644 index d30f78deb..000000000 --- a/runtime/onert/core/src/interp/operations/Softmax.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/SoftMax.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Softmax.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepareSoftMax(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - UNUSED_RELEASE(in_tensor); - - assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2)); - - // Output shape should be same with input - // Output type is pre-defined in model - const auto output_shape = env->graph().operands().at(in_index).info().shape(); - const auto output_type = env->graph().operands().at(out_index).info().typeInfo(); - - const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type); - env->allocateIfNeeded(out_index, output_info); - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Check output shape is same with input - assert(out_tensor->num_dimensions() == out_tensor->num_dimensions()); - for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++) - { - assert(in_tensor->dimension(i) == out_tensor->dimension(i)); - } -} - -void invoke(const ITensor *in_tensor, const ITensor *out_tensor, - const ir::operation::Softmax::Param ¶m) -{ - const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); - float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); - - float beta = param.beta; - - if (in_tensor->num_dimensions() == 2) - { - uint32_t batch_size = in_tensor->dimension(0); - uint32_t input_size = in_tensor->dimension(1); - - nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr); - } - else if (in_tensor->num_dimensions() == 4) - { - const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - - nnfw::cker::SoftmaxParams cker_param; - cker_param.beta = beta; - - nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr); - } - else - { - throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"}; - } -} - -void invokeSoftMax(const ExecEnv *env, const ir::Operation &node) -{ - const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node); - - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - const auto out_tensor = env->tensorAt(out_index); - - const auto in_data_type = in_tensor->data_type(); - const auto out_data_type = out_tensor->data_type(); - if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32)) - { - invoke(in_tensor, out_tensor, softmax_node.param()); - } - else - { - throw std::runtime_error{"NYI: Support float32 only"}; - } -} - -} // namespace - -OpKernel *getSoftmax() -{ - static OpKernel kernel = {prepareSoftMax, invokeSoftMax}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc deleted file mode 100644 index cc2ced26b..000000000 --- a/runtime/onert/core/src/interp/operations/TransposeConv.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/TransposeConv.h> -#include <misc/polymorphic_downcast.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/TransposeConv.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepareTransposeConv(ExecEnv *env, const ir::Operation &node) -{ - const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); - const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); - const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE); - const auto ofm_index = node.getOutputs().at(0); - - const auto ifm_tensor = env->tensorAt(ifm_index); - const auto ker_tensor = env->tensorAt(ker_index); - const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index); - - assert(ifm_tensor->num_dimensions() == 4); - assert(ker_tensor->num_dimensions() == 4); - assert(ofm_shape_tensor->num_dimensions() == 1); - - UNUSED_RELEASE(ifm_tensor); - UNUSED_RELEASE(ker_tensor); - UNUSED_RELEASE(ofm_shape_tensor); - - const auto output_info = env->graph().operands().at(ofm_index).info(); - if (output_info.total_size() == 0) - { - // TODO: Handle unspecified output shape - throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; - } - else - { - env->allocateIfNeeded(ofm_index, output_info); - } - - auto ofm_tensor = env->tensorAt(ofm_index); - UNUSED_RELEASE(ofm_tensor); - - // Handle same ifm & ofm data type only - if (ifm_tensor->data_type() != ofm_tensor->data_type()) - { - throw std::runtime_error{"Interp(TConv): Different I/O data dype"}; - } - - if (ofm_tensor->num_dimensions() != 4) - { - throw std::runtime_error{"Interp(TConv): Invalid output rank"}; - } -} - -void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor, - const ir::operation::TransposeConv::Param ¶m) -{ - const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. - const auto ker_shape = ker_tensor->tensorInfo().shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); - const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, - ker_width, ker_height); - - nnfw::cker::TransposeConvParams cker_param; - cker_param.padding_values.width = padding.left; - cker_param.padding_values.height = padding.top; - cker_param.stride_width = param.stride.horizontal; - cker_param.stride_height = param.stride.vertical; - cker_param.dilation_width_factor = 1; - cker_param.dilation_height_factor = 1; - - const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape()); - const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape()); - const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape()); - const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO()); - const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO()); - float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer()); - - nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, - cker_ofm_shape, ofm_ptr); -} - -void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node) -{ - const auto &tconv_node = - nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node); - - const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT); - const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); - const auto ofm_index = node.getOutputs().at(0); - - const auto ifm_tensor = env->tensorAt(ifm_index); - const auto ker_tensor = env->tensorAt(ker_index); - const auto ofm_tensor = env->tensorAt(ofm_index); - - const auto data_type = ifm_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param()); - } - else - { - throw std::runtime_error{"Interp(TConv): Support float32 only"}; - } -} - -} // namespace - -OpKernel *getTransposeConv() -{ - static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 80c659b3a..07670c720 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -41,11 +41,17 @@ size_t sizeOfDataType(DataType data_type) case DataType::UINT8: return sizeof(uint8_t); case DataType::QUANT_INT8_SYMM: + case DataType::QUANT_INT8_ASYMM: + case DataType::QUANT_INT8_SYMM_PER_CHANNEL: return sizeof(int8_t); case DataType::FLOAT16: return sizeof(float16); case DataType::INT64: return sizeof(int64_t); + case DataType::QUANT_INT16_ASYMM: + return sizeof(int16_t); + case DataType::QUANT_INT16_SYMM: + return sizeof(int16_t); default: throw std::runtime_error{"Unsupported type size"}; } diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc index fe8b1b443..306572c99 100644 --- a/runtime/onert/core/src/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.cc @@ -16,18 +16,10 @@ #include "ir/Graph.h" -#include <algorithm> -#include <bitset> -#include <sstream> - -#include "util/logging.h" +#include "OperationValidator.h" #include "verifier/Verifier.h" -#include "ir/operation/LowerInfo.h" -#include "ir/operand/LowerInfo.h" -#include "ir/operand/PermuteFactor.h" -#include "ir/OperandIndexMap.h" -#include "ir/GraphIterator.h" -#include "backend/IConfig.h" + +#include "util/Set.h" namespace onert { @@ -36,6 +28,8 @@ namespace ir Graph::Graph() = default; +Graph::Graph(const Graph &) = default; + Graph::~Graph(void) = default; OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type) @@ -43,22 +37,91 @@ OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type) return _operands.emplace(shape, type); } -OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node) +OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand) +{ + return _operands.push(std::move(operand), index); +} + +bool Graph::checkOperandsForOperation(const IOperation &operation) { - assert(isBuildingPhase()); - return _operations.push(std::move(node)); + auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + for (auto &&input : inputs) + if (!operands().exist(input)) + return false; + for (auto &&input : outputs) + if (!operands().exist(input)) + return false; + return true; +} + +void Graph::linkOperandToOperation(OperationIndex index, const IOperation &operation) +{ + auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + for (auto &&input : inputs) + operands().at(input).insertUse(index); + for (auto &&output : outputs) + operands().at(output).setDef(index); +} + +OperationIndex Graph::addOperation(std::unique_ptr<IOperation> &&operation) +{ + const IOperation &op_ref = *operation; + if (!checkOperandsForOperation(op_ref)) + return OperationIndex{}; + auto ind = _operations.push(std::move(operation)); + if (ind.valid()) + linkOperandToOperation(ind, op_ref); + return ind; +} + +OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation) +{ + const IOperation &op_ref = *operation; + if (!checkOperandsForOperation(op_ref)) + return OperationIndex{}; + auto ind_gen = _operations.push(std::move(operation), index); + if (ind_gen.valid()) + { + assert(ind_gen == index); + linkOperandToOperation(index, op_ref); + } + return index; +} + +OperationIndex Graph::replaceOperation(OperationIndex index, + std::unique_ptr<IOperation> &&operation) +{ + const IOperation &op_ref = *operation; + if (!checkOperandsForOperation(op_ref) || !_operations.exist(index)) + return OperationIndex{}; + + // Check the new operation has the same inputs/outputs as the existing operation + const auto &old_op = _operations.at(index); + if (!(old_op.getInputs() == op_ref.getInputs() && old_op.getOutputs() == op_ref.getOutputs())) + { + return OperationIndex{}; + } + + return _operations.set(index, std::move(operation)); } void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data) { - assert(isBuildingPhase()); assert(_operands.exist(ind)); _operands.at(ind).data(std::move(data)); } +void Graph::changeShape(const OperandIndex &ind, const ir::Shape &new_shape) +{ + assert(_operands.exist(ind)); + _operands.at(ind).info().shape(new_shape); +} + void Graph::addInput(const OperandIndex &ind, const std::string &name) { - assert(isBuildingPhase()); if (!name.empty()) _name_to_input.emplace(name, IOIndex{_inputs.size()}); _inputs.append(ind); @@ -66,7 +129,6 @@ void Graph::addInput(const OperandIndex &ind, const std::string &name) void Graph::addOutput(const OperandIndex &ind, const std::string &name) { - assert(isBuildingPhase()); if (!name.empty()) _name_to_output.emplace(name, IOIndex{_outputs.size()}); _outputs.append(ind); @@ -84,62 +146,70 @@ IOIndex Graph::getOutputIndex(const std::string &name) const return (itr == _name_to_output.end()) ? IOIndex{} : itr->second; } -void Graph::finishBuilding(void) +void Graph::verify(void) const { - assert(isBuildingPhase()); - _phase = Phase::MODEL; - - initializeUseDef(); - sweepGarbageOperands(); - // Call graph verifications for the MODEL phase { - assert(verifier::DAGChecker().verify(*this)); - assert(verifier::EdgeConsistencyChecker().verify(*this)); + // Except for edge consistency, the user might have been given a bad model + // so here it throws an execption rather than assertion. + if (!verifier::InputOutputChecker().verify(*this)) + throw std::runtime_error{"One of model input and output operands does not exist."}; + if (!verifier::DAGChecker().verify(*this)) + throw std::runtime_error{"The graph is cyclic."}; + assert(verifier::EdgeChecker().verify(*this)); } + + // Check shape independent operation feature + // - Operand type + // - Shape independent parameter + OperationValidator{*this}(); } void Graph::initializeUseDef() { - operations().iterate([&](const OperationIndex &index, const Operation &node) -> void { - auto outputs = node.getOutputs(); - for (auto output : outputs) + operations().iterate([&](const OperationIndex &index, const IOperation &node) -> void { + const auto &outputs = node.getOutputs(); + for (auto &&output : outputs | ir::Remove::UNDEFINED) { operands().at(output).setDef(index); } - for (auto input : node.getInputs() | ir::Remove::UNDEFINED) + for (auto &&input : node.getInputs() | ir::Remove::UNDEFINED) { operands().at(input).insertUse(index); } }); } -void Graph::sweepGarbageOperands() +std::vector<ir::OperationIndex> Graph::topolSortOperations() const { - // Remove operands that are not used by any operations, except Graph inputs/outputs - ir::OperandIndexMap<bool> visited; - - operations().iterate([&](const OperationIndex &, const Operation &node) { - for (auto ind : node.getInputs() + node.getOutputs()) - { - visited[ind] = true; - } - }); - - // Graph's inputs/outputs are always reachable - for (auto ind : getInputs() + getOutputs()) - { - visited[ind] = true; - } - - operands().iterate([&](const OperandIndex &ind, const Operand &) { - if (!visited[ind]) + std::vector<ir::OperationIndex> ret; + util::Set<ir::OperationIndex> unvisited; + operations().iterate( + [&](const ir::OperationIndex &index, const ir::IOperation &) { unvisited.add(index); }); + + std::function<void(const ir::OperationIndex &, const ir::IOperation &)> dfs = + [&](const ir::OperationIndex &index, const ir::IOperation &op) -> void { + if (!unvisited.contains(index)) + return; + unvisited.remove(index); + + for (const auto &output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { - VERBOSE(Graph::sweepGarbageOperands) << "Sweep garbage operand " << ind.value() << std::endl; - operands().remove(ind); + const auto &operand = operands().at(output); + for (const auto &use : operand.getUses()) + { + dfs(use, operations().at(use)); + } } - }); + ret.push_back(index); + }; + operations().iterate(dfs); + + assert(unvisited.empty()); // All of the nodes must have been visited + // Reversing Postorder DFS result to make it sorted in topoligical order + std::reverse(ret.begin(), ret.end()); + return ret; } } // namespace ir diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc new file mode 100644 index 000000000..144500745 --- /dev/null +++ b/runtime/onert/core/src/ir/Graph.test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" +#include "ir/operation/BinaryArithmetic.h" + +#include <gtest/gtest.h> + +TEST(Graph, neg_inputs_and_outputs) +{ + onert::ir::Graph graph; + + onert::ir::OperandIndex index0{0u}; + onert::ir::OperandIndex index1{1u}; + + graph.addInput({index0}); + graph.addInput({index1}); + + onert::ir::OperandIndex index10{10u}; + onert::ir::OperandIndex index11{11u}; + onert::ir::OperandIndex index12{12u}; + + graph.addOutput({index10}); + graph.addOutput({index11}); + graph.addOutput({index12}); + + ASSERT_EQ(graph.getInputs().size(), 2); + ASSERT_EQ(graph.getOutputs().size(), 3); + + onert::ir::IOIndex io_index0{0}; + onert::ir::IOIndex io_index1{1}; + onert::ir::IOIndex io_index2{2}; + + ASSERT_EQ(graph.getInputs().at(io_index0), 0); + ASSERT_EQ(graph.getInputs().at(io_index1), 1); + + ASSERT_EQ(graph.getOutputs().at(io_index0), 10); + ASSERT_EQ(graph.getOutputs().at(io_index1), 11); + ASSERT_EQ(graph.getOutputs().at(io_index2), 12); + + EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range); +} + +using namespace onert::ir; + +OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs, + const OperandIndexSequence outputs) +{ + // Add "ADD" operation + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param.activation = Activation::NONE; + return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param)); +} + +TEST(Graph, OneOpGraphSimpleValid) +{ + // Simple Graph with just one Add operation + + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto lhs = graph.addOperand(shape, type); + auto rhs = graph.addOperand(shape, type); + auto res = graph.addOperand(shape, type); + + addAddOperation(graph, {lhs, rhs}, {res}); + + // Set model inputs/outputs + graph.addInput(lhs); + graph.addInput(rhs); + graph.addOutput(res); + + graph.verify(); + + SUCCEED(); +} + +TEST(Graph, neg_InvalidGraph_BadInput) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + graph.addInput(OperandIndex{89}); // Non-exisiting operand! + + EXPECT_ANY_THROW(graph.verify()); +} + +TEST(Graph, neg_InvalidGraph_BadOutput) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + graph.addOutput(OperandIndex{12}); // Non-exisiting operand! + + EXPECT_ANY_THROW(graph.verify()); +} + +TEST(Graph, neg_InvalidAddOperation_BadInputIndex) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto lhs = graph.addOperand(shape, type); + auto rhs = graph.addOperand(shape, type); + auto res = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(lhs); + graph.addInput(rhs); + graph.addOutput(res); + + ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid()); +} diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc deleted file mode 100644 index 4bea1a55d..000000000 --- a/runtime/onert/core/src/ir/GraphIterator.cc +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GraphIterator.h" - -#include "ir/OperationIndexMap.h" -#include "compiler/LoweredGraph.h" - -namespace onert -{ -namespace ir -{ - -// -// Graph::DefaultIterator -// - -template <bool is_const> -void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const -{ - graph.operations().iterate( - [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); }); -} - -// -// Graph::PostDfsIterator -// - -template <bool is_const> -void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const -{ - assert(!graph.isBuildingPhase()); // Restrict iteration condition - - OperationIndexMap<bool> visited; - graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; }); - - std::function<void(const OperationIndex &, NodeRef)> dfs_recursive = - [&](const OperationIndex &index, NodeRef node) -> void { - if (visited[index]) - return; - visited[index] = true; - - for (const auto output : node.getOutputs() | Remove::DUPLICATED) - { - const auto &operand = graph.operands().at(output); - for (const auto &use : operand.getUses()) - { - dfs_recursive(use, graph.operations().at(use)); - } - } - - fn(index, node); - }; - - graph.operations().iterate(dfs_recursive); - - // All of the operations(nodes) must have been visited. - assert(std::all_of(visited.begin(), visited.end(), - [](const std::pair<const OperationIndex, bool> &v) { return v.second; })); -} - -template <bool is_const> -void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph, - const OpSeqIterFn &fn) const -{ - std::unordered_map<OpSequenceIndex, bool> visited; - lowered_graph.op_seqs().iterate( - [&](const OpSequenceIndex &index, OpSequenceRef) { visited[index] = false; }); - - std::function<void(const OpSequenceIndex &, OpSequenceRef)> dfs_recursive = - [&](const OpSequenceIndex &index, OpSequenceRef op_seq) -> void { - if (visited[index]) - return; - visited[index] = true; - - for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED) - { - const auto &operand = lowered_graph.graph().operands().at(output); - for (const auto &use : operand.getUses()) - { - const auto use_op_seq_index = lowered_graph.op_seqs().getOperation(use); - dfs_recursive(use_op_seq_index, lowered_graph.op_seqs().at(use_op_seq_index)); - } - } - - fn(index, op_seq); - }; - - lowered_graph.op_seqs().iterate(dfs_recursive); - - // All of the operations(nodes) must have been visited. - assert(std::all_of(visited.begin(), visited.end(), - [](const std::pair<const OpSequenceIndex, bool> &v) { return v.second; })); -} - -// Explicit instantiations to have implementation in the source file. -// NOTE If these instatiations were in the top of this file, `iterate` is compiled and saved in -// `GraphIterator.cc.o` but `iterateOpSeqs`. This happens only when cross-building for Android. -// (Maybe a bug of NDK toolchain(clang)?) - -template class DefaultIterator<true>; -template class DefaultIterator<false>; - -template class PostDfsIterator<true>; -template class PostDfsIterator<false>; - -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h deleted file mode 100644 index b54314e0e..000000000 --- a/runtime/onert/core/src/ir/GraphIterator.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_GRAPH_ITERATOR_H__ -#define __ONERT_IR_GRAPH_ITERATOR_H__ - -#include <type_traits> - -#include "ir/Index.h" - -namespace onert -{ -namespace compiler -{ -class LoweredGraph; -} // namespace compiler -} // namespace onert - -namespace onert -{ -namespace ir -{ - -class Graph; -class Operation; -class OpSequence; - -template <bool is_const> class Iterator -{ -public: - using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type; - using IndexRef = const OperationIndex &; - using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type; - using IterFn = std::function<void(IndexRef, NodeRef)>; - -public: - virtual ~Iterator() = default; - virtual void iterate(GraphRef graph, const IterFn &fn) const = 0; -}; - -template <bool is_const = false> class DefaultIterator final : public Iterator<is_const> -{ -public: - using GraphRef = typename Iterator<is_const>::GraphRef; - using IndexRef = typename Iterator<is_const>::IndexRef; - using NodeRef = typename Iterator<is_const>::NodeRef; - using IterFn = typename Iterator<is_const>::IterFn; - -public: - void iterate(GraphRef graph, const IterFn &fn) const; -}; -using DefaultConstIterator = DefaultIterator<true>; - -template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const> -{ -public: - using GraphRef = typename Iterator<is_const>::GraphRef; - using IndexRef = typename Iterator<is_const>::IndexRef; - using NodeRef = typename Iterator<is_const>::NodeRef; - using IterFn = typename Iterator<is_const>::IterFn; - using LoweredGraphRef = - typename std::conditional<is_const, const typename compiler::LoweredGraph &, - typename compiler::LoweredGraph &>::type; - using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type; - using OpSeqIndexRef = const OpSequenceIndex &; - using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>; - -public: - void iterate(GraphRef graph, const IterFn &fn) const; - void iterateOpSeqs(LoweredGraphRef lowered_graph, const OpSeqIterFn &f) const; -}; -using PostDfsConstIterator = PostDfsIterator<true>; - -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_GRAPH_ITERATOR_H__ diff --git a/runtime/onert/core/src/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.cc index bd3f438ad..732460aa2 100644 --- a/runtime/onert/core/src/ir/LayoutSet.cc +++ b/runtime/onert/core/src/ir/LayoutSet.cc @@ -23,7 +23,7 @@ namespace ir LayoutSet::LayoutSet(std::initializer_list<Layout> layouts) { - for (auto layout : layouts) + for (auto &&layout : layouts) { _set.insert(layout); } @@ -32,7 +32,7 @@ LayoutSet::LayoutSet(std::initializer_list<Layout> layouts) LayoutSet LayoutSet::operator|(const LayoutSet &other) const { auto ret = *this; - for (auto layout : other) + for (auto &&layout : other) { ret.add(layout); } @@ -42,7 +42,7 @@ LayoutSet LayoutSet::operator|(const LayoutSet &other) const LayoutSet LayoutSet::operator&(const LayoutSet &other) const { LayoutSet ret; - for (auto layout : other) + for (auto &&layout : other) { if (contains(layout)) { @@ -55,7 +55,7 @@ LayoutSet LayoutSet::operator&(const LayoutSet &other) const LayoutSet LayoutSet::operator-(const LayoutSet &other) const { auto ret = *this; - for (auto layout : other) + for (auto &&layout : other) { ret.remove(layout); } diff --git a/runtime/onert/core/src/ir/LayoutSet.h b/runtime/onert/core/src/ir/LayoutSet.h index 6ce4e38c6..be077f2f0 100644 --- a/runtime/onert/core/src/ir/LayoutSet.h +++ b/runtime/onert/core/src/ir/LayoutSet.h @@ -17,6 +17,7 @@ #ifndef __ONERT_IR_LAYOUT_SET_H__ #define __ONERT_IR_LAYOUT_SET_H__ +#include <cstdint> #include <initializer_list> #include <unordered_set> diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc new file mode 100644 index 000000000..fc956abe8 --- /dev/null +++ b/runtime/onert/core/src/ir/LayoutSet.test.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LayoutSet.h" + +#include <gtest/gtest.h> + +using onert::ir::Layout; +using onert::ir::LayoutSet; + +TEST(ir_LayoutSet, neg_add_remove) +{ + LayoutSet set{Layout::NCHW}; + set.remove(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 2); + set.remove(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.remove(Layout::NCHW); + ASSERT_EQ(set.size(), 0); + set.remove(Layout::NCHW); + ASSERT_EQ(set.size(), 0); +} + +TEST(ir_LayoutSet, neg_add_twice) +{ + LayoutSet set; + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 1); +} + +TEST(ir_LayoutSet, set_operators) +{ + LayoutSet set1{Layout::NCHW}; + LayoutSet set2{Layout::NHWC}; + LayoutSet set3 = set1 | set2; + + ASSERT_EQ(set3.size(), 2); + + ASSERT_EQ((set3 - set1).size(), 1); + ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true); + ASSERT_EQ((set3 - set2).size(), 1); + ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true); + ASSERT_EQ((set3 - set3).size(), 0); + + ASSERT_EQ((set3 & set1).size(), 1); + ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true); + ASSERT_EQ((set3 & set2).size(), 1); + ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true); + ASSERT_EQ((set1 & set2).size(), 0); +} diff --git a/runtime/onert/core/src/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h new file mode 100644 index 000000000..0e7ed977b --- /dev/null +++ b/runtime/onert/core/src/ir/MockNode.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__ +#define __ONERT_TEST_GRAPH_MOCK_NODE_H__ + +#include "ir/Operation.h" +#include "ir/OperandIndexSequence.h" + +namespace onert_test +{ +namespace ir +{ + +class SimpleMock : public onert::ir::Operation +{ +public: + SimpleMock(const onert::ir::OperandIndexSequence &inputs, + const onert::ir::OperandIndexSequence &outputs) + : Operation{onert::ir::OperandConstraint::createAny()} + { + setInputs(inputs); + setOutputs(outputs); + } + +public: + void accept(onert::ir::OperationVisitor &) const override {} + onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; } +}; + +} // namespace ir +} // namespace onert_test + +#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__ diff --git a/runtime/onert/core/src/ir/OpSequence.cc b/runtime/onert/core/src/ir/OpSequence.cc deleted file mode 100644 index e2b989d8c..000000000 --- a/runtime/onert/core/src/ir/OpSequence.cc +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/OpSequence.h" - -#include "ir/Operations.h" -#include "ir/OperationVisitor.h" -#include <sstream> - -namespace -{ - -std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice) -{ - std::string str; - for (const auto &ind : indice) - { - str += std::to_string(ind.value()); - str.push_back(','); - } - if (str.back() == ',') - str.pop_back(); - - return str; -} -} - -namespace onert -{ -namespace ir -{ - -OpSequence::OpSequence(Layout layout) : _layout{layout}, _has_dynamic_tensor{false} -{ - // DO NOTHING -} - -void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); } - -// TODO: Impl Dumper instead of this method -std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations) -{ - // " OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)" - std::stringstream ss; - ss << " OpSequence IN(" << getStrFromIndice(op_seq.getInputs()) << ") -> {"; - for (const auto &op_idx : op_seq) - { - ss << " " << op_idx.value() << "(" << operations.at(op_idx).name() << ":" - << getStrFromIndice(operations.at(op_idx).getInputs()) << ":" - << getStrFromIndice(operations.at(op_idx).getOutputs()) << ")"; - } - ss << " } -> OUT(" << getStrFromIndice(op_seq.getOutputs()) << ")"; - return ss.str(); -} - -void OpSequence::remove(const OperationIndex &index) -{ - assert(exist(index)); - for (auto it = _operations.cbegin(); it != _operations.cend(); ++it) - { - if (*it == index) - { - _operations.erase(it); - break; - } - } -} - -bool OpSequence::exist(const OperationIndex &index) const -{ - for (const auto &inner_op_idx : _operations) - { - if (inner_op_idx == index) - { - return true; - } - } - return false; -} - -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc deleted file mode 100644 index 68884783e..000000000 --- a/runtime/onert/core/src/ir/OpSequences.cc +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/OpSequences.h" -#include "util/logging.h" -#include <memory> - -#include <cassert> -#include <string> - -namespace onert -{ -namespace ir -{ - -OpSequenceIndex OpSequences::emplace(const OperationIndex &index, Layout layout) -{ - std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout); - op_seq->appendOperation(index); - const OpSequenceIndex &seq_index = push(std::move(op_seq)); - cacheSequenceIndex(seq_index, index); - return seq_index; -} - -OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq) -{ - auto &operations = op_seq->operations(); - const OpSequenceIndex &seq_index = push(std::move(op_seq)); - for (const auto &op_idx : operations) - { - cacheSequenceIndex(seq_index, op_idx); - } - return seq_index; -} - -void OpSequences::cacheSequenceIndex(const OpSequenceIndex &seq_index, - const OperationIndex &op_index) const -{ - _seq_indexes.emplace(op_index, seq_index); -} - -OpSequenceIndex *OpSequences::findSequenceIndex(const OperationIndex &operation_index) const -{ - // If opration_index is cached, return sequence_index from cache - if (_seq_indexes.count(operation_index)) - { - auto &op_seq_index = _seq_indexes.at(operation_index); - if (_objects.count(op_seq_index) && _objects.at(op_seq_index)->exist(operation_index)) - { - return &op_seq_index; - } - else - { - _seq_indexes.erase(operation_index); - return nullptr; - } - } - return nullptr; -} - -bool OpSequences::containsOperation(const OperationIndex &operation_index) const -{ - return findOperation(operation_index).valid(); -} - -OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const -{ - OpSequenceIndex ret = findOperation(operation_index); - assert(ret.valid()); - return ret; -} - -void OpSequences::removeFromOpSequence(const OperationIndex &operation_index) -{ - const auto op_seq_index = findOperation(operation_index); - auto &op_seq = at(op_seq_index); - _seq_indexes.erase(operation_index); - op_seq.remove(operation_index); - if (op_seq.size() == 0) - { - remove(op_seq_index); - } -} - -OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const -{ - if (OpSequenceIndex *op_seq_index = findSequenceIndex(operation_index)) - return *op_seq_index; - - for (auto &e : _objects) - { - OpSequence &object = *e.second; - auto it = find(object.operations().begin(), object.operations().end(), operation_index); - if (it != object.operations().end()) - { - cacheSequenceIndex(e.first, operation_index); - return e.first; - } - } - throw std::runtime_error("Operation not found"); -} - -void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations) -{ - op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) { - VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl; - }); -} - -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc index e29c7a6ec..18981dbf1 100644 --- a/runtime/onert/core/src/ir/Operand.cc +++ b/runtime/onert/core/src/ir/Operand.cc @@ -46,5 +46,11 @@ void Operand::setDef(const OperationIndex &idx) { _def = idx; } void Operand::unsetDef() { _def = OperationIndex{}; } +void Operand::clearDefUse() +{ + unsetDef(); + _uses.clear(); +} + } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc new file mode 100644 index 000000000..0b858792a --- /dev/null +++ b/runtime/onert/core/src/ir/Operand.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" + +#include "MockNode.h" +#include "verifier/Verifier.h" + +#include <gtest/gtest.h> + +#include <memory> +#include <typeindex> + +namespace +{ + +using IndexSet = onert::ir::OperandIndexSequence; +using Mock = onert_test::ir::SimpleMock; + +} // namespace + +TEST(ir_Operand, neg_usedef) +{ + onert::ir::Graph graph; + onert::ir::verifier::DAGChecker verifier; + + onert::ir::Shape shape(3); + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + // Model Input/Output + auto input_operand = graph.addOperand(shape, type); + auto output_operand = graph.addOperand(shape, type); + + graph.addInput(input_operand); + graph.addOutput(output_operand); + + // MockNode1 + auto operand_index1 = graph.addOperand(shape, type); + auto mocknode_index1 = + graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1})); + + // MockNode2 + auto operand_index2 = graph.addOperand(shape, type); + auto mocknode_index2 = + graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2})); + + // MockNode3(two input) + auto multiinput_index = graph.addOperation( + std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); + + graph.verify(); + + ASSERT_TRUE(verifier.verify(graph)); + + // Check def + ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1); + ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2); + ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index); + + ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2); + ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index); + + // Check use + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true); + ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true); + + ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1); + ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0); +} diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc index 73f928280..a15b6d0d6 100644 --- a/runtime/onert/core/src/ir/OperandIndexSequence.cc +++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc @@ -31,7 +31,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> l OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list) { - for (auto val : list) + for (auto &&val : list) { _vec.emplace_back(static_cast<uint32_t>(val)); } @@ -39,7 +39,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list) OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list) { - for (auto val : list) + for (auto &&val : list) { _vec.emplace_back(val); } @@ -55,6 +55,11 @@ void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex std::replace(_vec.begin(), _vec.end(), from, to); } +bool OperandIndexSequence::operator==(const OperandIndexSequence &other) const +{ + return _vec == other._vec; +} + OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence &other) const { OperandIndexSequence ret = *this; @@ -62,10 +67,10 @@ OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence return ret; } -std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq) +std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq) { std::string delimeter; - for (const auto &ind : op_seq._vec) + for (const auto &ind : operand_seq._vec) { o << delimeter << ind; delimeter = ','; diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc new file mode 100644 index 000000000..588c4e419 --- /dev/null +++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OperandIndexSequence.h" + +#include <gtest/gtest.h> + +using onert::ir::OperandIndex; +using onert::ir::OperandIndexSequence; + +TEST(ir_OperandIndexSequence, neg_append) +{ + OperandIndexSequence iset{0, 2, 4, 8}; + + ASSERT_EQ(iset.size(), 4); + + iset.append(OperandIndex{10}); + + ASSERT_EQ(iset.size(), 5); + + onert::ir::IOIndex index1{1}; + onert::ir::IOIndex index2{4}; + + ASSERT_EQ(iset.at(index1), 2); + ASSERT_EQ(iset.at(index2), 10); + + ASSERT_TRUE(iset.contains(OperandIndex{2})); + ASSERT_TRUE(iset.contains(OperandIndex{10})); + ASSERT_FALSE(iset.contains(OperandIndex{11})); +} + +TEST(graph_OperandIndexSequence, neg_replace) +{ + OperandIndexSequence iset{0, 1, 2, 3}; + + iset.replace(OperandIndex{1}, OperandIndex{9}); + ASSERT_FALSE(iset.contains(OperandIndex{1})); + ASSERT_TRUE(iset.contains(OperandIndex{9})); +} diff --git a/runtime/onert/core/src/ir/Operands.cc b/runtime/onert/core/src/ir/Operands.cc index ab32e478a..f8cfd16ef 100644 --- a/runtime/onert/core/src/ir/Operands.cc +++ b/runtime/onert/core/src/ir/Operands.cc @@ -29,7 +29,7 @@ Operands::Operands(const Operands &obj) obj.iterate([&](const OperandIndex &index, const Operand &operand) { _objects.emplace(index, std::make_unique<Operand>(operand)); }); - _index_count = obj._index_count; + _next_index = obj._next_index; } } // namespace ir diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc new file mode 100644 index 000000000..aff228b10 --- /dev/null +++ b/runtime/onert/core/src/ir/Operands.test.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operands.h" + +#include <gtest/gtest.h> + +TEST(ir_Operands, neg_set_test) +{ + onert::ir::Operands set; + + onert::ir::Shape shape0{1, 2, 3}; + + onert::ir::Shape shape1(4); + shape1.dim(0) = 10; + shape1.dim(1) = 20; + shape1.dim(2) = 30; + shape1.dim(3) = 40; + + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + set.emplace(shape0, type); + set.emplace(shape1, type); + + ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true); + ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true); + ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false); + + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1); + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2); + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3); +} diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc index 04be8c0d9..64792525d 100644 --- a/runtime/onert/core/src/ir/Operation.cc +++ b/runtime/onert/core/src/ir/Operation.cc @@ -24,22 +24,33 @@ namespace ir { Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs, - const OperandIndexSequence &outputs) - : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs} + const OperandIndexSequence &outputs, OperandConstraint output_constr) + : _input_constr{input_constr}, _output_constr{output_constr} { + setInputs(inputs); + setOutputs(outputs); } -Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {} +Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr) + : _input_constr{input_constr}, _output_constr{output_constr} +{ +} Operation::~Operation() = default; void Operation::setInputs(const OperandIndexSequence &indexes) { - assert(_input_constr.check(indexes.size())); + if (!_input_constr.check(indexes.size())) + throw std::runtime_error{"Invalid number of input tensors for this operation."}; _inputs = indexes; } -void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; } +void Operation::setOutputs(const OperandIndexSequence &indexes) +{ + if (!_output_constr.check(indexes.size())) + throw std::runtime_error{"Invalid number of output tensors for this operation."}; + _outputs = indexes; +} void Operation::replaceInputs(const OperandIndex &from, const OperandIndex &to) { diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc new file mode 100644 index 000000000..b3c4e852d --- /dev/null +++ b/runtime/onert/core/src/ir/Operation.test.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/OperandIndexSequence.h" +#include "ir/operation/Concat.h" +#include "ir/operation/Conv2D.h" + +#include <gtest/gtest.h> + +#include <memory> +#include <stdexcept> + +using Index = onert::ir::IOIndex; +using IndexSet = onert::ir::OperandIndexSequence; + +TEST(ir_Operation_setIO, operation_setIO_conv) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + // Add Conv + using Graph = onert::ir::operation::Conv2D; + + auto input_operand = graph.addOperand(shape, type); + auto kernel_operand = graph.addOperand(shape, type); + auto bias_operand = graph.addOperand(shape, type); + IndexSet inputs{input_operand, kernel_operand, bias_operand}; + + Graph::Param conv_params; + conv_params.padding.type = onert::ir::PaddingType::SAME; + conv_params.stride.horizontal = 1; + conv_params.stride.vertical = 1; + conv_params.activation = onert::ir::Activation::NONE; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto conv = std::make_unique<Graph>(inputs, outputs, conv_params); + + ASSERT_NE(conv, nullptr); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + conv->setInputs({8, 9, 10}); + ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8); +} + +TEST(ir_Operation_setIO, neg_operation_setIO_concat) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + using Graph = onert::ir::operation::Concat; + + // Add Concat + IndexSet inputs; + for (int i = 0; i < 6; ++i) + { + inputs.append(graph.addOperand(shape, type)); + } + + Graph::Param concat_params{0}; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto concat = std::make_unique<Graph>(inputs, outputs, concat_params); + + ASSERT_NE(concat, nullptr); + ASSERT_EQ(concat->getInputs().size(), 6); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + + concat->setInputs({80, 6, 9, 11}); + ASSERT_EQ(concat->getInputs().size(), 4); + ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80); + ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9); + ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range); +} diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc index b4e60f0bc..64e1cc807 100644 --- a/runtime/onert/core/src/ir/OperationCloner.cc +++ b/runtime/onert/core/src/ir/OperationCloner.cc @@ -23,6 +23,23 @@ namespace onert namespace ir { +namespace +{ + +class OperationCloner : public OperationVisitor +{ +public: +#define OP(Name) void visit(const operation::Name &o) override; +#include "ir/Operations.lst" +#undef OP + +public: + std::unique_ptr<Operation> releaseClone(); + +private: + std::unique_ptr<Operation> _return_op; +}; + #define OP(Name) \ void OperationCloner::visit(const operation::Name &o) \ { \ @@ -38,5 +55,14 @@ std::unique_ptr<Operation> OperationCloner::releaseClone() return std::move(_return_op); } +} // namespace + +std::unique_ptr<Operation> clone(const IOperation &operation) +{ + OperationCloner cloner; + operation.accept(cloner); + return cloner.releaseClone(); +} + } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h index 0e8cda2a0..49297a05c 100644 --- a/runtime/onert/core/src/ir/OperationCloner.h +++ b/runtime/onert/core/src/ir/OperationCloner.h @@ -26,19 +26,7 @@ namespace onert namespace ir { -class OperationCloner : public OperationVisitor -{ -public: -#define OP(Name) void visit(const operation::Name &o) override; -#include "ir/Operations.lst" -#undef OP - -public: - std::unique_ptr<Operation> releaseClone(); - -private: - std::unique_ptr<Operation> _return_op; -}; +std::unique_ptr<Operation> clone(const IOperation &operation); } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index 48361f464..5e6d700f3 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -29,19 +29,21 @@ using namespace operation; namespace { -void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "") + +// Dump all input and output. +// Use this function when there is no special input or(and) output. +void dumpOpGeneric(const Operation &node, const std::string &adding_input = "") { VERBOSE(LIR) << "* " << node.name() << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs() << ")" << std::endl; } -void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "") +void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "") { VERBOSE(LIR) << "* " << node.name() << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0) - << ") " << adding_input << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input + << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } @@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type) << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl; VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } - -void dumpPackingOp(const Operation &node) -{ - VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - for (auto i : node.getInputs()) - { - inputs += std::to_string(i.value()) + ","; - } - VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} } // namespace OperationDumper::OperationDumper(const std::string &start_msg) @@ -72,41 +62,62 @@ OperationDumper::OperationDumper(const std::string &start_msg) VERBOSE(LIR) << start_msg << std::endl; } -void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ArgMinMax &node) +{ + std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)"; + VERBOSE(LIR) << "* " << node.name() << min_max << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis(" + << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} void OperationDumper::visit(const BatchToSpaceND &node) { std::string block_size = - "BlockSize(" + - std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")"; - dumpUnaryInputOp(node, block_size); + "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + + ")"; + dumpOpGeneric(node, block_size); } -void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const BCQFullyConnected &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(BCQFullyConnected::Input::INPUT) + << ") WeightsBinary(" + << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_BINARY) + << ") WeightsScales(" + << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_SCALES) + << ") WeightsClusters(" + << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_CLUSTERS) << ") Bias(" + << node.getInputs().at(BCQFullyConnected::Input::BIAS) << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; +} + +void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); } +void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Conv2D &node) { std::string padding_type = - node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; dumpConvOp(node, padding_type); } -void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); } void OperationDumper::visit(const DepthwiseConv2D &node) { std::string padding_type = - node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; dumpConvOp(node, padding_type); } @@ -122,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node) { params = " alpha value(" + std::to_string(node.param().alpha) + ")"; } - dumpUnaryInputOp(node, params); + dumpOpGeneric(node, params); } -void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); } void OperationDumper::visit(const EmbeddingLookup &node) { @@ -141,22 +152,30 @@ void OperationDumper::visit(const EmbeddingLookup &node) void OperationDumper::visit(const ExpandDims &node) { std::string axis = - "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")"; + "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")"; dumpUnaryInputOp(node, axis); } +void OperationDumper::visit(const Fill &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value(" + << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} + void OperationDumper::visit(const FullyConnected &node) { std::string inputs = - "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) + - ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")"; + "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) + + ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")"; dumpUnaryInputOp(node, inputs); } void OperationDumper::visit(const Gather &node) { std::string indices = - "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")"; + "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")"; dumpUnaryInputOp(node, indices); } @@ -174,50 +193,70 @@ void OperationDumper::visit(const HashtableLookup &node) void OperationDumper::visit(const InstanceNorm &node) { std::string inputs = - "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + - ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")"; + "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + ") Beta(" + + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")"; dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); } + +void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Loss &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Prediction(" << node.getInputs().at(Loss::Input::Y_PRED) << ") True(" + << node.getInputs().at(Loss::Input::Y_TRUE) << ")" << std::endl; + VERBOSE(LIR) << " - Outputs : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} void OperationDumper::visit(const LSTM &node) { + VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) - << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT) - << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS) - << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS) - << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS) - << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS) - << ") Recurrent To Input Weights(" - << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS) - << ") Recurrent To Forget Weights(" - << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS) - << ") Recurrent To Cell Weights(" - << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS) - << ") Recurrent To Output Weights(" - << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights(" - << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights(" - << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights(" - << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias(" - << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias(" - << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias(" - << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias(" - << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights(" - << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias(" - << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In(" - << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In(" - << node.getInputs().at(LSTM::Input::CELL_STATE_IN) << ")" << std::endl; + << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT) + << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS) + << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS) + << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS) + << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS) + << ") Recurrent To Input Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS) + << ") Recurrent To Forget Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS) + << ") Recurrent To Cell Weights(" << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS) + << ") Recurrent To Output Weights(" + << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights(" + << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias(" + << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias(" + << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias(" + << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias(" + << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights(" + << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias(" + << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In(" + << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In(" + << node.getInputs().at(LSTM::Input::CELL_STATE_IN); + if (node.getInputs().size() == 24) + { + VERBOSE(LIR) << ") Input Layer Normalization Weights(" + << node.getInputs().at(LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS) + << ") Forget Layer Normalization Weights(" + << node.getInputs().at(LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS) + << ") Cell Layer Normalization Weights(" + << node.getInputs().at(LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS) + << ") Ouput Layer Normalization Weights(" + << node.getInputs().at(LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS); + } + VERBOSE(LIR) << ")" << std::endl; VERBOSE(LIR) << " - Output : Scratch Buffer(" << node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER) << ") Output State Out(" - << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out(" - << node.getInputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output(" - << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl; + << node.getOutputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out(" + << node.getOutputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output(" + << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl; } -void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); } +void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Pad &node) { @@ -249,23 +288,23 @@ void OperationDumper::visit(const Permute &node) void OperationDumper::visit(const Pool2D &node) { std::string padding_type = - node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl; VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")" << std::endl; VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); } void OperationDumper::visit(const PReLU &node) { std::string alpha = - "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")"; - dumpUnaryInputOp(node, alpha); + "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")"; + dumpOpGeneric(node, alpha); } -void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); } @@ -273,18 +312,20 @@ void OperationDumper::visit(const Reshape &node) { // optional param std::string shape = - node.getInputs().size() == 2 - ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")" - : "Shape(not provided)"; + node.getInputs().size() == 2 + ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")" + : "Shape(not provided)"; dumpUnaryInputOp(node, shape); } -void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); } + +void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Reverse &node) { std::string axis = - "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")"; + "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")"; dumpUnaryInputOp(node, axis); } @@ -320,25 +361,24 @@ void OperationDumper::visit(const Select &node) VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); } void OperationDumper::visit(const SpaceToBatchND &node) { std::string inputs = - "BlockSize(" + - std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) + - ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) + - ")"; + "BlockSize(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) + + ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) + + ")"; dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); } void OperationDumper::visit(const StatelessRandomUniform &node) { @@ -349,7 +389,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node) VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); } @@ -358,7 +398,7 @@ void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); void OperationDumper::visit(const Tile &node) { std::string multiples = - "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")"; + "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")"; dumpUnaryInputOp(node, multiples); } @@ -375,7 +415,7 @@ void OperationDumper::visit(const TopKV2 &node) void OperationDumper::visit(const TransposeConv &node) { std::string padding_type = - node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl; VERBOSE(LIR) << " - Inputs : Output Shape(" << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE) << ") KERNEL(" @@ -384,22 +424,14 @@ void OperationDumper::visit(const TransposeConv &node) VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Unpack &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } void OperationDumper::visit(const OneHot &node) @@ -413,51 +445,21 @@ void OperationDumper::visit(const OneHot &node) void OperationDumper::visit(const If &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - const auto &input_indices = node.getInputs(); - for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) - { - inputs += std::to_string(it->value()); - if (std::next(it) != std::end(input_indices)) - inputs += ", "; - } VERBOSE(LIR) << " - Inputs : " << "Then subgraph (" << node.param().then_subg_index << ") Else subgraph (" - << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl; + << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } void OperationDumper::visit(const While &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - const auto &input_indices = node.getInputs(); - for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) - { - inputs += std::to_string(it->value()); - if (std::next(it) != std::end(input_indices)) - inputs += ", "; - } VERBOSE(LIR) << " - Inputs : " << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph (" - << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl; + << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } } // namespace ir diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h index e8ab3b3cd..99bf869d5 100644 --- a/runtime/onert/core/src/ir/OperationDumper.h +++ b/runtime/onert/core/src/ir/OperationDumper.h @@ -31,8 +31,9 @@ public: OperationDumper(const std::string &start_msg); public: - void visit(const operation::ArgMax &) override; + void visit(const operation::ArgMinMax &) override; void visit(const operation::BatchToSpaceND &node) override; + void visit(const operation::BCQFullyConnected &node) override; void visit(const operation::BinaryArithmetic &node) override; void visit(const operation::BroadcastTo &) override; void visit(const operation::Comparison &) override; @@ -47,12 +48,14 @@ public: void visit(const operation::ElementwiseUnary &) override; void visit(const operation::EmbeddingLookup &) override; void visit(const operation::ExpandDims &) override; + void visit(const operation::Fill &) override; void visit(const operation::FullyConnected &node) override; void visit(const operation::Gather &) override; void visit(const operation::HashtableLookup &) override; void visit(const operation::InstanceNorm &) override; void visit(const operation::L2Normalization &) override; void visit(const operation::LocalResponseNormalization &) override; + void visit(const operation::Loss &node) override; void visit(const operation::LSTM &) override; void visit(const operation::Pack &) override; void visit(const operation::Pad &) override; @@ -65,6 +68,7 @@ public: void visit(const operation::Reduce &) override; void visit(const operation::Reshape &node) override; void visit(const operation::ResizeBilinear &) override; + void visit(const operation::ResizeNearestNeighbor &) override; void visit(const operation::Reverse &) override; void visit(const operation::RNN &) override; void visit(const operation::Select &node) override; diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc new file mode 100644 index 000000000..09f773cf0 --- /dev/null +++ b/runtime/onert/core/src/ir/OperationValidator.cc @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationValidator.h" + +#include "ir/Graph.h" +#include "util/logging.h" + +#define OP_REQUIRES(EXP) \ + do \ + { \ + if (!(EXP)) \ + throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \ + } while (0) + +namespace onert +{ +namespace ir +{ + +OperationValidator::OperationValidator(const Graph &graph) + : _operations{graph.operations()}, _operands{graph.operands()} +{ +} + +void OperationValidator::operator()() +{ + _operations.iterate([&](const OperationIndex &, const IOperation &node) { node.accept(*this); }); +} + +DataType OperationValidator::operandType(const OperandIndex &idx) +{ + return _operands.at(idx).typeInfo().type(); +} + +bool OperationValidator::isConstant(const OperandIndex &idx) +{ + return _operands.at(idx).isConstant(); +} + +bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex &idx2) +{ + return operandType(idx1) == operandType(idx2); +} + +bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2) +{ + if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale()) + return false; + + if (_operands.at(idx1).typeInfo().zero_point() != _operands.at(idx2).typeInfo().zero_point()) + return false; + + return true; +} + +bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type) +{ + return operandType(idx) == type; +} + +bool OperationValidator::isValidType(const OperandIndex &idx, + std::initializer_list<DataType> valid_types) +{ + for (auto &&type_to_check : valid_types) + { + if (isValidType(idx, type_to_check)) + { + return true; + } + } + + return false; +} + +void OperationValidator::visit(const operation::AddN &node) +{ + const auto output_index(node.getOutputs().at(0)); + + int size = node.getInputs().size(); + for (int i = 0; i < size; i++) + { + const auto input_index(node.getInputs().at(i)); + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32})); + OP_REQUIRES(isSameType(input_index, output_index)); + } +} + +void OperationValidator::visit(const operation::ArgMinMax &node) +{ + const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT)); + const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS)); + const auto output_index(node.getOutputs().at(0)); + const auto output_type = node.param().output_type; + + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, output_type)); +} + +void OperationValidator::visit(const operation::BatchMatMul &node) +{ + const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS)); + const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS)); + const auto output_index(node.getOutputs().at(0)); + + // Constant lhs and rhs is not implemented yet + OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index)); + + // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float) + OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(lhs_index, rhs_index) || + ((operandType(lhs_index) == DataType::FLOAT32) && + (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM))); + OP_REQUIRES(isSameType(lhs_index, output_index)); +} + +void OperationValidator::visit(const operation::BatchToSpaceND &node) +{ + const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + + OP_REQUIRES(isSameType(input_index, output_index)); +} + +void OperationValidator::visit(const operation::BinaryArithmetic &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::RHS)}; + + OP_REQUIRES(isSameType(lhs_index, rhs_index)); + OP_REQUIRES(isSameType(lhs_index, output_index)); +} + +void OperationValidator::visit(const operation::Comparison &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + const auto lhs_index{node.getInputs().at(operation::Comparison::Input::INPUT0)}; + const auto rhs_index{node.getInputs().at(operation::Comparison::Input::INPUT1)}; + + OP_REQUIRES(isSameType(lhs_index, rhs_index)); + OP_REQUIRES(isValidType(output_index, DataType::BOOL8)); +} + +void OperationValidator::visit(const operation::Concat &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + for (auto &&input_index : node.getInputs()) + { + OP_REQUIRES(isSameType(input_index, output_index)); + + // Int8 quantization requires same scale and zero point + if (isValidType(output_index, DataType::QUANT_INT8_ASYMM)) + { + OP_REQUIRES(isSameQuantParam(input_index, output_index)); + } + } +} + +void OperationValidator::visit(const operation::Conv2D &node) +{ + const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)}; + const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)}; + const auto output_index{node.getOutputs().at(0)}; + + uint32_t stride_horizontal = node.param().stride.horizontal; + uint32_t stride_vertical = node.param().stride.vertical; + uint32_t dilation_width = node.param().dilation.width_factor; + uint32_t dilation_height = node.param().dilation.height_factor; + + OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0)); + OP_REQUIRES((dilation_width > 0) && (dilation_height > 0)); + OP_REQUIRES(isSameType(input_index, output_index)); + + if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM) + { + for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points()) + OP_REQUIRES(zeropoint == 0); + } +} + +void OperationValidator::visit(const operation::DepthToSpace &node) +{ + const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + + int32_t block_size = node.param().block_size; + + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(input_index, output_index)); + + OP_REQUIRES(block_size > 0); +} + +void OperationValidator::visit(const operation::DetectionPostProcess &node) +{ + const auto ¶m = node.param(); + + // FIXME: number of classes should be 1 for now. + OP_REQUIRES(param.num_classes == 1); +} + +void OperationValidator::visit(const operation::DepthwiseConv2D &node) +{ + const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)}; + const auto kernel_index{node.getInputs().at(operation::DepthwiseConv2D::Input::KERNEL)}; + const auto output_index{node.getOutputs().at(0)}; + + uint32_t stride_horizontal = node.param().stride.horizontal; + uint32_t stride_vertical = node.param().stride.vertical; + uint32_t dilation_width = node.param().dilation.width_factor; + uint32_t dilation_height = node.param().dilation.height_factor; + + OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0)); + OP_REQUIRES((dilation_width > 0) && (dilation_height > 0)); + OP_REQUIRES(isSameType(input_index, output_index)); + + if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM) + { + for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points()) + OP_REQUIRES(zeropoint == 0); + } +} + +void OperationValidator::visit(const operation::ElementwiseActivation &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + // Check if I/O types match + OP_REQUIRES(isSameType(output_index, input_index)); + + switch (node.param().op_type) + { + case operation::ElementwiseActivation::Type::ELU: + OP_REQUIRES(isValidType(input_index, DataType::FLOAT32)); + break; + case operation::ElementwiseActivation::Type::LEAKY_RELU: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::LOGISTIC: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::RELU: + OP_REQUIRES(isValidType( + input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + break; + case operation::ElementwiseActivation::Type::TANH: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + } +} + +void OperationValidator::visit(const operation::ElementwiseBinary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::RHS)}; + + OP_REQUIRES(isSameType(lhs_index, rhs_index)); + OP_REQUIRES(isSameType(lhs_index, output_index)); + + const auto op_type = node.param().op_type; + if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND || + op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR) + { + OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8)); + } +} + +void OperationValidator::visit(const operation::ElementwiseUnary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::ElementwiseUnary::Input::INPUT)}; + + // Check if I/O types match + if (node.param().op_type == operation::ElementwiseUnary::Type::DEQUANTIZE) + { + // NNAPI allow QUANT_INT8_SYMM type input + OP_REQUIRES(isValidType(input_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_SYMM, + DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isValidType(output_index, DataType::FLOAT32)); + } + else if (node.param().op_type == operation::ElementwiseUnary::Type::QUANTIZE) + { + OP_REQUIRES(isValidType( + input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES( + isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + } + else if (node.param().op_type == operation::ElementwiseUnary::Type::FLOOR) + { + OP_REQUIRES(isValidType(input_index, DataType::FLOAT32)); + OP_REQUIRES(isSameType(output_index, input_index)); + } + else if (node.param().op_type != operation::ElementwiseUnary::Type::CAST) + { + OP_REQUIRES(isSameType(output_index, input_index)); + } +} + +void OperationValidator::visit(const operation::EmbeddingLookup &node) +{ + const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)}; + const auto output_index{node.getOutputs().at(0)}; + + OP_REQUIRES(isValidType(lookups_index, DataType::INT32)); + + // TFLite: Allow hybrid type - value table & output + // NNAPI: Require same value table and output type + OP_REQUIRES( + isSameType(values_index, output_index) || + (isValidType(output_index, DataType::FLOAT32) && + (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM})))); +} + +void OperationValidator::visit(const operation::ExpandDims &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::ExpandDims::Input::INPUT)}; + const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)}; + + OP_REQUIRES(isSameType(output_index, input_index)); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); +} + +void OperationValidator::visit(const operation::Fill &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)}; + const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)}; + + OP_REQUIRES(isSameType(output_index, value_index)); + OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, + {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8})); +} + +void OperationValidator::visit(const operation::HashtableLookup &node) +{ + const auto hits_index{node.getOutputs().at(operation::HashtableLookup::Output::HITS)}; + const auto lookups_index{node.getInputs().at(operation::HashtableLookup::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(operation::HashtableLookup::Input::KEYS)}; + + OP_REQUIRES(isValidType(lookups_index, DataType::INT32)); + OP_REQUIRES(isValidType(keys_index, DataType::INT32)); + OP_REQUIRES(isValidType(hits_index, DataType::QUANT_UINT8_ASYMM)); +} + +void OperationValidator::visit(const operation::Pack &node) +{ + const auto num{node.param().num}; + + OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size())); +} + +void OperationValidator::visit(const operation::Pad &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(operation::Pad::Input::PAD)}; + bool isQuantType = + isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}); + bool isPadV2 = node.getInputs().size() == 3 ? true : false; + + OP_REQUIRES(isValidType(pad_index, DataType::INT32)); + OP_REQUIRES(isSameType(input_index, output_index)); + + if (isQuantType) + OP_REQUIRES(isSameQuantParam(input_index, output_index)); + + if (isPadV2) + { + const auto value_index{node.getInputs().at(operation::Pad::Input::VALUE)}; + const bool cond_same = isSameType(input_index, value_index); + const bool cond_same_quant = (!isQuantType || isSameQuantParam(input_index, value_index)); + const auto input_t = operandType(input_index); + const auto value_t = operandType(value_index); + // NNAPI accepts this case. scale and zeroPoint are assumed to be the same as in input0. + const bool cond_quant8 = + ((input_t == DataType::QUANT_UINT8_ASYMM || input_t == DataType::QUANT_INT8_ASYMM) && + value_t == DataType::INT32); + OP_REQUIRES((cond_same && cond_same_quant) || cond_quant8); + } +} + +void OperationValidator::visit(const operation::Rank &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + OP_REQUIRES(isValidType(output_index, DataType::INT32)); +} + +void OperationValidator::visit(const operation::ResizeBilinear &node) +{ + auto align_corners = node.param().align_corners; + auto half_pixel_centers = node.param().half_pixel_centers; + + OP_REQUIRES(!align_corners || !half_pixel_centers); +} + +void OperationValidator::visit(const operation::Reverse &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Reverse::Input::INPUT)}; + const auto axis_index{node.getInputs().at(operation::Reverse::Input::AXIS)}; + + OP_REQUIRES(isValidType(axis_index, DataType::INT32)); + OP_REQUIRES(isSameType(output_index, input_index)); +} + +void OperationValidator::visit(const operation::Select &node) +{ + const auto condition_index{node.getInputs().at(operation::Select::Input::CONDITION)}; + const auto input_true_index{node.getInputs().at(operation::Select::Input::INPUT_TRUE)}; + const auto input_false_index{node.getInputs().at(operation::Select::Input::INPUT_FALSE)}; + + OP_REQUIRES(isValidType(condition_index, DataType::BOOL8)); + OP_REQUIRES(isSameType(input_true_index, input_false_index)); +} + +void OperationValidator::visit(const operation::Shape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + OP_REQUIRES(isValidType(output_index, {DataType::UINT32, DataType::INT32, DataType::INT64})); +} + +void OperationValidator::visit(const operation::Slice &node) +{ + const auto begins_index{node.getInputs().at(operation::Slice::BEGINS)}; + const auto sizes_index{node.getInputs().at(operation::Slice::SIZES)}; + + OP_REQUIRES(isValidType(begins_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isSameType(begins_index, sizes_index)); +} + +void OperationValidator::visit(const operation::Softmax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Softmax::INPUT)}; + + OP_REQUIRES(isSameType(input_index, output_index)); + OP_REQUIRES(isValidType( + output_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); +} + +void OperationValidator::visit(const operation::SpaceToBatchND &node) +{ + const auto block_size_index{node.getInputs().at(operation::SpaceToBatchND::Input::BLOCK_SIZE)}; + const auto paddings_index{node.getInputs().at(operation::SpaceToBatchND::Input::PADDINGS)}; + + // Non-constant block_size and padding is not implemented yet + OP_REQUIRES(isConstant(block_size_index)); + OP_REQUIRES(isConstant(paddings_index)); +} + +void OperationValidator::visit(const operation::SpaceToDepth &node) +{ + const auto block_size = node.param().block_size; + OP_REQUIRES(block_size >= 1); +} + +void OperationValidator::visit(const operation::Split &node) +{ + const auto num_splits = node.param().num_splits; + + OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF); + OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits)); +} + +void OperationValidator::visit(const operation::SquaredDifference &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(operation::SquaredDifference::Input::LHS)}; + const auto rhs_index{node.getInputs().at(operation::SquaredDifference::Input::RHS)}; + + OP_REQUIRES(isSameType(output_index, lhs_index)); + OP_REQUIRES(isSameType(lhs_index, rhs_index)); +} + +void OperationValidator::visit(const operation::StatelessRandomUniform &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto shape_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SHAPE)}; + const auto seed_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SEED)}; + + OP_REQUIRES(isValidType(output_index, DataType::FLOAT32)); + OP_REQUIRES(isValidType(shape_index, DataType::INT32)); + OP_REQUIRES(isValidType(seed_index, DataType::INT32)); +} + +void OperationValidator::visit(const operation::StridedSlice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::StridedSlice::Input::INPUT)}; + + OP_REQUIRES(isSameType(output_index, input_index)); +} + +void OperationValidator::visit(const operation::TransposeConv &node) +{ + OP_REQUIRES((node.param().padding.type == PaddingType::SAME) || + (node.param().padding.type == PaddingType::VALID)); +} + +void OperationValidator::visit(const operation::Unpack &node) +{ + const auto num{node.param().num}; + OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size())); +} + +void OperationValidator::visit(const operation::While &node) +{ + OP_REQUIRES(node.getInputs().size() == node.getOutputs().size()); +} + +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h new file mode 100644 index 000000000..b9bcc4ee8 --- /dev/null +++ b/runtime/onert/core/src/ir/OperationValidator.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_OPERATION_VALIDATOR_H__ +#define __ONERT_IR_OPERATION_VALIDATOR_H__ + +#include "ir/OperationVisitor.h" +#include "ir/Operations.h" +#include "ir/Operands.h" + +namespace onert +{ +namespace ir +{ +class Graph; +class Operands; +} // namespace ir +} // namespace onert + +namespace onert +{ +namespace ir +{ + +class OperationValidator : public OperationVisitor +{ +public: + OperationValidator(void) = delete; + OperationValidator(const Graph &graph); + +public: + void operator()(); + +public: + void visit(const operation::AddN &node) override; + void visit(const operation::ArgMinMax &node) override; + void visit(const operation::BatchMatMul &node) override; + void visit(const operation::BatchToSpaceND &node) override; + void visit(const operation::BinaryArithmetic &node) override; + void visit(const operation::Comparison &node) override; + void visit(const operation::Concat &node) override; + void visit(const operation::Conv2D &node) override; + void visit(const operation::DepthToSpace &node) override; + void visit(const operation::DepthwiseConv2D &node) override; + void visit(const operation::DetectionPostProcess &node) override; + void visit(const operation::ElementwiseActivation &node) override; + void visit(const operation::ElementwiseBinary &node) override; + void visit(const operation::ElementwiseUnary &node) override; + void visit(const operation::EmbeddingLookup &node) override; + void visit(const operation::ExpandDims &node) override; + void visit(const operation::Fill &node) override; + void visit(const operation::HashtableLookup &node) override; + void visit(const operation::Pack &node) override; + void visit(const operation::Pad &node) override; + void visit(const operation::Rank &node) override; + void visit(const operation::ResizeBilinear &node) override; + void visit(const operation::Reverse &node) override; + void visit(const operation::Select &node) override; + void visit(const operation::Shape &node) override; + void visit(const operation::Slice &node) override; + void visit(const operation::Softmax &node) override; + void visit(const operation::SpaceToBatchND &node) override; + void visit(const operation::SpaceToDepth &node) override; + void visit(const operation::Split &node) override; + void visit(const operation::SquaredDifference &node) override; + void visit(const operation::StatelessRandomUniform &node) override; + void visit(const operation::StridedSlice &node) override; + void visit(const operation::TransposeConv &node) override; + void visit(const operation::Unpack &node) override; + void visit(const operation::While &node) override; + +private: + DataType operandType(const OperandIndex &idx); + bool isConstant(const OperandIndex &idx); + bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2); + bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2); + bool isValidType(const OperandIndex &idx, const DataType &type); + bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types); + +private: + const Operations &_operations; + const Operands &_operands; +}; + +} // namespace ir +} // namespace onert + +#endif // __ONERT_IR_OPERATION_VALIDATOR_H__ diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc index 64d0bd6f0..1b4691f58 100644 --- a/runtime/onert/core/src/ir/Operations.cc +++ b/runtime/onert/core/src/ir/Operations.cc @@ -25,12 +25,9 @@ namespace ir Operations::Operations(const Operations &obj) { - obj.iterate([&](const OperationIndex &index, const Operation &op) { - OperationCloner cloner; - op.accept(cloner); - _objects.emplace(index, cloner.releaseClone()); - }); - _index_count = obj._index_count; + obj.iterate( + [&](const OperationIndex &index, const IOperation &op) { _objects.emplace(index, clone(op)); }); + _next_index = obj._next_index; } } // namespace ir diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc new file mode 100644 index 000000000..e57872689 --- /dev/null +++ b/runtime/onert/core/src/ir/Operations.test.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operations.h" + +#include "MockNode.h" + +#include <gtest/gtest.h> + +using onert::ir::Operation; +using onert::ir::OperationIndex; +using onert::ir::Operations; + +TEST(ir_Operations, basic) +{ + Operations ops; + ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); + OperationIndex idx{0u}; + ASSERT_EQ(ops.at(idx).getInputs().size(), 4); + ASSERT_EQ(ops.at(idx).getOutputs().size(), 3); +} + +TEST(ir_Operations, neg_at) +{ + Operations ops; + ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); + OperationIndex idx{99u}; + EXPECT_THROW(ops.at(idx), std::out_of_range); +} diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc index d74f80217..b2b004e7a 100644 --- a/runtime/onert/core/src/ir/Padding.cc +++ b/runtime/onert/core/src/ir/Padding.cc @@ -66,14 +66,14 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; const int32_t horizontal_expected_output = - (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; + (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; const int32_t vertical_needed_input = - (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size; + (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size; const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H); const int32_t horizontal_needed_input = - (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size; + (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size; const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W); padding.top = vertical_total_padding / 2; @@ -90,7 +90,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS { const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; const int32_t horizontal_expected_output = - (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; + (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; assert(vertical_expected_output == ofm_shape.H); assert(horizontal_expected_output == ofm_shape.W); @@ -129,7 +129,7 @@ Padding::Padding(PaddingType paddingType) : type{paddingType}, param{0, 0, 0, 0} } Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) - : type{PaddingType::EXPLICIT}, param{left, right, top, bottom} + : type{PaddingType::EXPLICIT}, param{left, right, top, bottom} { // DO NOTHING } diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc index 322df7b4c..e4e4c154b 100644 --- a/runtime/onert/core/src/ir/Shape.cc +++ b/runtime/onert/core/src/ir/Shape.cc @@ -26,10 +26,10 @@ namespace onert namespace ir { -int32_t const Shape::UNSPECIFIED_DIM = -1; +int32_t const Shape::kUnspecifiedDim = -1; // NNFW_MAX_RANK is 6 -int32_t const Shape::MAX_RANK = 6; +int32_t const Shape::kMaxRank = 6; FeatureShape Shape::asFeature(Layout layout) const { @@ -80,34 +80,37 @@ uint64_t Shape::num_elements() const { // if dimension is 0, it means unspecified and cannot calculate the total number of elements if (std::any_of(_dimensions.begin(), _dimensions.end(), - [](const int32_t &v) { return v == UNSPECIFIED_DIM; })) + [](const int32_t &v) { return v == kUnspecifiedDim; })) throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified"); return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1), std::multiplies<uint64_t>()); } -Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout) +Shape permuteShape(const Shape &shape, Layout from, Layout to) { - assert(shape.rank() <= Shape::MAX_RANK); - Shape backend_shape{shape}; - if (shape.rank() >= 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW) + assert(shape.rank() <= Shape::kMaxRank); + Shape ret{shape}; + if (from == to) + return ret; + if (shape.rank() < 4) + return ret; + // Permutation changing layout beyond 4-D is not supported yet + assert(shape.rank() <= 4); + if (from == Layout::NHWC && to == Layout::NCHW) { - // Permutation changing layout beyond 4-D is not supported yet - assert(shape.rank() <= 4); - backend_shape.dim(1) = shape.dim(3); - backend_shape.dim(2) = shape.dim(1); - backend_shape.dim(3) = shape.dim(2); + ret.dim(1) = shape.dim(3); + ret.dim(2) = shape.dim(1); + ret.dim(3) = shape.dim(2); } - else if (shape.rank() >= 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC) + else if (from == Layout::NCHW && to == Layout::NHWC) { - // Permutation changing layout beyond 4-D is not supported yet - assert(shape.rank() <= 4); - backend_shape.dim(1) = shape.dim(2); - backend_shape.dim(2) = shape.dim(3); - backend_shape.dim(3) = shape.dim(1); + ret.dim(1) = shape.dim(2); + ret.dim(2) = shape.dim(3); + ret.dim(3) = shape.dim(1); } - return backend_shape; + // Other cases(either `from` or `to` is UNKNOWN), just return the original shape + return ret; } } // namespace ir diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc new file mode 100644 index 000000000..4788522d3 --- /dev/null +++ b/runtime/onert/core/src/ir/Shape.test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Shape.h" + +#include <gtest/gtest.h> + +TEST(ShapeTest, basic_test) +{ + { + onert::ir::Shape shape(3); + + shape.dim(0) = 1; + shape.dim(1) = 2; + shape.dim(2) = 3; + + ASSERT_EQ(shape.rank(), 3); + ASSERT_EQ(shape.num_elements(), 6); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); + ASSERT_EQ(shape.hasUnspecifiedDims(), false); + } + { + onert::ir::Shape shape; // scalar or rank is unspecified + + ASSERT_EQ(shape.rank(), 0); + ASSERT_EQ(shape.num_elements(), 1); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true); + ASSERT_EQ(shape.hasUnspecifiedDims(), false); + } +} + +TEST(ShapeTest, neg_basic_test) +{ + { + onert::ir::Shape shape(2); + + shape.dim(0) = 1; + shape.dim(1) = onert::ir::Shape::kUnspecifiedDim; + + ASSERT_EQ(shape.rank(), 2); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); + ASSERT_EQ(shape.hasUnspecifiedDims(), true); + EXPECT_ANY_THROW(shape.num_elements()); + } +} diff --git a/runtime/onert/core/src/ir/TypeInfo.cc b/runtime/onert/core/src/ir/TypeInfo.cc index ab8af287e..5d1c7ba8b 100644 --- a/runtime/onert/core/src/ir/TypeInfo.cc +++ b/runtime/onert/core/src/ir/TypeInfo.cc @@ -28,7 +28,7 @@ bool operator==(const TypeInfo &lhs, const TypeInfo &rhs) return false; } - if (lhs.offset() != rhs.offset()) + if (lhs.zero_point() != rhs.zero_point()) { return false; } diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc new file mode 100644 index 000000000..a51e12dff --- /dev/null +++ b/runtime/onert/core/src/ir/operation/AddN.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/AddN.h" +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void AddN::accept(OperationVisitor &v) const { v.visit(*this); } + +AddN::AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs} +{ +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc index 1275ae43a..2f18ff2e2 100644 --- a/runtime/onert/core/src/ir/operation/ArgMax.cc +++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc @@ -14,10 +14,7 @@ * limitations under the License. */ -#include "ir/operation/ArgMax.h" - -#include <cassert> - +#include "ir/operation/ArgMinMax.h" #include "ir/OperationVisitor.h" namespace onert @@ -27,11 +24,11 @@ namespace ir namespace operation { -void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); } +void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); } -ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc index 9dc54e6e9..ccda674ad 100644 --- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc +++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BCQFullyConnected.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void BCQFullyConnected::accept(OperationVisitor &v) const { v.visit(*this); } BCQFullyConnected::BCQFullyConnected(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc index 80efa6460..1ca5b0c9f 100644 --- a/runtime/onert/core/src/ir/operation/BCQGather.cc +++ b/runtime/onert/core/src/ir/operation/BCQGather.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BCQGather.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void BCQGather::accept(OperationVisitor &v) const { v.visit(*this); } BCQGather::BCQGather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/BatchMatMul.cc b/runtime/onert/core/src/ir/operation/BatchMatMul.cc index b9616158d..20c5682f9 100644 --- a/runtime/onert/core/src/ir/operation/BatchMatMul.cc +++ b/runtime/onert/core/src/ir/operation/BatchMatMul.cc @@ -28,7 +28,7 @@ void BatchMatMul::accept(OperationVisitor &v) const { v.visit(*this); } BatchMatMul::BatchMatMul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc index 9ef2b125f..3c5578ac4 100644 --- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc +++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BatchToSpaceND.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); } BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc index 2b1422c73..5eb3fc3d7 100644 --- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc +++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/BinaryArithmetic.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -32,7 +30,7 @@ void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); } BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } @@ -40,10 +38,10 @@ std::string BinaryArithmetic::name() const { using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType; static const std::unordered_map<ArithmeticType, std::string> name_map{ - {ArithmeticType::ADD, std::string{"Add"}}, - {ArithmeticType::SUB, std::string{"Sub"}}, - {ArithmeticType::MUL, std::string{"Mul"}}, - {ArithmeticType::DIV, std::string{"Div"}}}; + {ArithmeticType::ADD, std::string{"Add"}}, + {ArithmeticType::SUB, std::string{"Sub"}}, + {ArithmeticType::MUL, std::string{"Mul"}}, + {ArithmeticType::DIV, std::string{"Div"}}}; return name_map.at(_param.arithmetic_type); } diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc index a8f5e59cf..eab6c0611 100644 --- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc +++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BroadcastTo.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -29,7 +26,7 @@ namespace operation void BroadcastTo::accept(OperationVisitor &v) const { v.visit(*this); } BroadcastTo::BroadcastTo(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc new file mode 100644 index 000000000..4b96c9d94 --- /dev/null +++ b/runtime/onert/core/src/ir/operation/Bulk.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Bulk.h" +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ +void Bulk::accept(OperationVisitor &v) const { v.visit(*this); } + +Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Bulk::Param ¶m) + : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc index 2f6775411..33365657c 100644 --- a/runtime/onert/core/src/ir/operation/Comparison.cc +++ b/runtime/onert/core/src/ir/operation/Comparison.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Comparison.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Comparison::accept(OperationVisitor &v) const { v.visit(*this); } Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc index 608bc29a6..3a21e36f2 100644 --- a/runtime/onert/core/src/ir/operation/Concat.cc +++ b/runtime/onert/core/src/ir/operation/Concat.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Concat.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Concat::accept(OperationVisitor &v) const { v.visit(*this); } Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc index 3a2e1d1fe..d615ae416 100644 --- a/runtime/onert/core/src/ir/operation/Conv2D.cc +++ b/runtime/onert/core/src/ir/operation/Conv2D.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Conv2D.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); } Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc index 676e039fa..365745ea8 100644 --- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc +++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ConvertFp16ToFp32.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void ConvertFp16ToFp32::accept(OperationVisitor &v) const { v.visit(*this); } ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc index bcfcbfc04..d4fc7031c 100644 --- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc +++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ConvertFp32ToFp16.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void ConvertFp32ToFp16::accept(OperationVisitor &v) const { v.visit(*this); } ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Custom.cc b/runtime/onert/core/src/ir/operation/Custom.cc index 25c53e1ba..06c84f81a 100644 --- a/runtime/onert/core/src/ir/operation/Custom.cc +++ b/runtime/onert/core/src/ir/operation/Custom.cc @@ -29,7 +29,7 @@ void Custom::accept(OperationVisitor &v) const { v.visit(*this); } Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, std::string id, const Userdata &userdata) - : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata) + : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata) { } diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc index f2d6c7c1b..e3edea777 100644 --- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc +++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/DepthToSpace.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); } DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc index d587a5591..0e7137306 100644 --- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/DepthwiseConv2D.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); } DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc new file mode 100644 index 000000000..cd708796d --- /dev/null +++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/DetectionPostProcess.h" +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param) +{ +} + +void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); } + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc index 3c1473aaa..b50f070e7 100644 --- a/runtime/onert/core/src/ir/operation/Einsum.cc +++ b/runtime/onert/core/src/ir/operation/Einsum.cc @@ -28,7 +28,7 @@ void Einsum::accept(OperationVisitor &v) const { v.visit(*this); } Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc index f6718b656..e83c26e28 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseActivation.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -33,13 +31,14 @@ void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { if (param.op_type == Type::LOGISTIC) { - assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as " - "sigmoid function(L=1, k=1, x0=0). So, do " - "not use alpha and beta"); + assert(param.alpha == 0.0f && param.beta == 0.0f && + "Logistic will be supported only as " + "sigmoid function(L=1, k=1, x0=0). So, do " + "not use alpha and beta"); } else if (param.op_type == Type::RELU) { @@ -47,9 +46,10 @@ ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs, } else if (param.op_type == Type::TANH) { - assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is " - "supported only the values of alpha and " - "beta are 1.f"); + assert(param.alpha == 1.0f && param.beta == 1.0f && + "f(x) = alpha * tanh(beta * x), Tanh is " + "supported only the values of alpha and " + "beta are 1.f"); } } @@ -57,11 +57,11 @@ std::string ElementwiseActivation::name() const { using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type; static const std::unordered_map<Type, std::string> name_map{ - {ElementwiseActivationType::ELU, "ELU"}, - {ElementwiseActivationType::LOGISTIC, "Logistic"}, - {ElementwiseActivationType::RELU, "ReLU"}, - {ElementwiseActivationType::TANH, "Tanh"}, - {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}}; + {ElementwiseActivationType::ELU, "ELU"}, + {ElementwiseActivationType::LOGISTIC, "Logistic"}, + {ElementwiseActivationType::RELU, "ReLU"}, + {ElementwiseActivationType::TANH, "Tanh"}, + {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc index 3287fc0a3..b22bed7bc 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseBinary.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -32,7 +30,7 @@ void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); } ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } @@ -40,10 +38,11 @@ std::string ElementwiseBinary::name() const { using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType; static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{ - {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}}, - {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}}, - {ElementwiseBinaryType::MAX, std::string{"Max"}}, - {ElementwiseBinaryType::MIN, std::string{"Min"}}}; + {ElementwiseBinaryType::FLOOR_DIV, std::string{"FloorDiv"}}, + {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}}, + {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}}, + {ElementwiseBinaryType::MAX, std::string{"Max"}}, + {ElementwiseBinaryType::MIN, std::string{"Min"}}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc index 7dfcd4a98..fd463e0fe 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseUnary.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -32,7 +30,9 @@ void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); } ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs, + OperandConstraint::createExact(1u)}, + _param{param} { } @@ -40,23 +40,23 @@ std::string ElementwiseUnary::name() const { using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type; static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{ - {ElementwiseUnaryType::ABS, std::string{"Abs"}}, - {ElementwiseUnaryType::CAST, std::string{"Cast"}}, - {ElementwiseUnaryType::COS, std::string{"Cos"}}, - {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}}, - {ElementwiseUnaryType::ERF, std::string{"Erf"}}, - {ElementwiseUnaryType::EXP, std::string{"Exp"}}, - {ElementwiseUnaryType::FLOOR, std::string{"Floor"}}, - {ElementwiseUnaryType::LOG, std::string{"Log"}}, - {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}}, - {ElementwiseUnaryType::NEG, std::string{"Neg"}}, - {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}}, - {ElementwiseUnaryType::ROUND, std::string{"Round"}}, - {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}}, - {ElementwiseUnaryType::SIN, std::string{"Sin"}}, - {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}}, - {ElementwiseUnaryType::SQURE, std::string{"Squre"}}, - {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}}; + {ElementwiseUnaryType::ABS, std::string{"Abs"}}, + {ElementwiseUnaryType::CAST, std::string{"Cast"}}, + {ElementwiseUnaryType::COS, std::string{"Cos"}}, + {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}}, + {ElementwiseUnaryType::ERF, std::string{"Erf"}}, + {ElementwiseUnaryType::EXP, std::string{"Exp"}}, + {ElementwiseUnaryType::FLOOR, std::string{"Floor"}}, + {ElementwiseUnaryType::LOG, std::string{"Log"}}, + {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}}, + {ElementwiseUnaryType::NEG, std::string{"Neg"}}, + {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}}, + {ElementwiseUnaryType::ROUND, std::string{"Round"}}, + {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}}, + {ElementwiseUnaryType::SIN, std::string{"Sin"}}, + {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}}, + {ElementwiseUnaryType::SQUARE, std::string{"Square"}}, + {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc index b300b004e..66b80b2c5 100644 --- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc +++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/EmbeddingLookup.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); } EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc index 3f555bd23..e421bc383 100644 --- a/runtime/onert/core/src/ir/operation/ExpandDims.cc +++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ExpandDims.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void ExpandDims::accept(OperationVisitor &v) const { v.visit(*this); } ExpandDims::ExpandDims(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc index c44f45aab..60355c609 100644 --- a/runtime/onert/core/src/ir/operation/Fill.cc +++ b/runtime/onert/core/src/ir/operation/Fill.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Fill.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Fill::accept(OperationVisitor &v) const { v.visit(*this); } Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc index 118ae554a..3533df097 100644 --- a/runtime/onert/core/src/ir/operation/FullyConnected.cc +++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/FullyConnected.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); } FullyConnected::FullyConnected(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc index 7b9301ea6..b5679f308 100644 --- a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc +++ b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc @@ -28,7 +28,7 @@ void FusedBatchNorm::accept(OperationVisitor &v) const { v.visit(*this); } FusedBatchNorm::FusedBatchNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc index 11d46e75b..e0c4630a0 100644 --- a/runtime/onert/core/src/ir/operation/Gather.cc +++ b/runtime/onert/core/src/ir/operation/Gather.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Gather.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Gather::accept(OperationVisitor &v) const { v.visit(*this); } Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc index e9a7a82ff..5d1589cd1 100644 --- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc +++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/HashtableLookup.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); } HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/If.cc b/runtime/onert/core/src/ir/operation/If.cc index 599751dfd..380c87dbe 100644 --- a/runtime/onert/core/src/ir/operation/If.cc +++ b/runtime/onert/core/src/ir/operation/If.cc @@ -24,7 +24,7 @@ namespace operation { void If::accept(OperationVisitor &v) const { v.visit(*this); } If::If(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc index 2334560ef..9fb55383e 100644 --- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc +++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/InstanceNorm.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); } InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc index 9a7d3eb61..6725df596 100644 --- a/runtime/onert/core/src/ir/operation/L2Normalization.cc +++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/L2Normalization.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); } L2Normalization::L2Normalization(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc index 30a865326..06e66158b 100644 --- a/runtime/onert/core/src/ir/operation/LSTM.cc +++ b/runtime/onert/core/src/ir/operation/LSTM.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LSTM.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,8 +28,16 @@ void LSTM::accept(OperationVisitor &v) const { v.visit(*this); } LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param} +{ +} + +std::string LSTM::name() const { + if (getOutputs().at(Output::SCRATCH_BUFFER).undefined()) + return std::string{"UnidirectionalSequenceLSTM"}; + else + return Operation::name(); } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc index 1ae97c142..73fca9938 100644 --- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc +++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LocalResponseNormalization.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -32,7 +29,7 @@ void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*th LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc index 73c6580ec..d580e63e1 100644 --- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc +++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LogSoftmax.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void LogSoftmax::accept(OperationVisitor &v) const { v.visit(*this); } LogSoftmax::LogSoftmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Loss.cc b/runtime/onert/core/src/ir/operation/Loss.cc new file mode 100644 index 000000000..fa3520b2c --- /dev/null +++ b/runtime/onert/core/src/ir/operation/Loss.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Loss.h" +#include "ir/OperationVisitor.h" + +#include <unordered_map> + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void Loss::accept(OperationVisitor &v) const { v.visit(*this); } + +Loss::Loss(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createAtLeast(2u), inputs, outputs}, _param{param} +{ + if (param.op_type == Type::CATEGORICAL_CROSSENTROPY) + { + assert(inputs.size() == 2 && "CategoricalCrossentropy Loss has 2 inputs"); + } +} + +std::string Loss::name() const +{ + using LossType = onert::ir::operation::Loss::Type; + static const std::unordered_map<Type, std::string> name_map{ + {LossType::MEAN_SQUARED_ERROR, "MeanSquaredError Loss"}, + {LossType::CATEGORICAL_CROSSENTROPY, "CategoricalCrossentropy Loss"}}; + return name_map.at(_param.op_type); +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc index bac31f13e..e52bddc1f 100644 --- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc +++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/MatrixBandPart.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void MatrixBandPart::accept(OperationVisitor &v) const { v.visit(*this); } MatrixBandPart::MatrixBandPart(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/OneHot.cc b/runtime/onert/core/src/ir/operation/OneHot.cc index 22935e7d6..90898f1ed 100644 --- a/runtime/onert/core/src/ir/operation/OneHot.cc +++ b/runtime/onert/core/src/ir/operation/OneHot.cc @@ -28,7 +28,7 @@ void OneHot::accept(OperationVisitor &v) const { v.visit(*this); } OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc index a2e37e0ad..87bd12e60 100644 --- a/runtime/onert/core/src/ir/operation/PReLU.cc +++ b/runtime/onert/core/src/ir/operation/PReLU.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/PReLU.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void PReLU::accept(OperationVisitor &v) const { v.visit(*this); } PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc index f0908a2c6..00feadfb0 100644 --- a/runtime/onert/core/src/ir/operation/Pack.cc +++ b/runtime/onert/core/src/ir/operation/Pack.cc @@ -25,7 +25,7 @@ namespace operation void Pack::accept(OperationVisitor &v) const { v.visit(*this); } Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc index 0c56e92e3..a3f2d9752 100644 --- a/runtime/onert/core/src/ir/operation/Pad.cc +++ b/runtime/onert/core/src/ir/operation/Pad.cc @@ -30,7 +30,7 @@ void Pad::accept(OperationVisitor &v) const { v.visit(*this); } // PAD: 2 inputs // PADV2: 3 inputs Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs} + : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc index eefb6c542..813fbaf30 100644 --- a/runtime/onert/core/src/ir/operation/Permute.cc +++ b/runtime/onert/core/src/ir/operation/Permute.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Permute.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Permute::accept(OperationVisitor &v) const { v.visit(*this); } Permute::Permute(const OperandIndex &input, const OperandIndex &output, Type type) - : Operation{OperandConstraint::createExact(1u)}, _type{type} + : Operation{OperandConstraint::createExact(1u)}, _type{type} { setInputs({input}); setOutputs({output}); diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc index 761d14c3d..e32b876e6 100644 --- a/runtime/onert/core/src/ir/operation/Pool2D.cc +++ b/runtime/onert/core/src/ir/operation/Pool2D.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/Pool2D.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -32,7 +30,7 @@ void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } @@ -40,9 +38,9 @@ std::string Pool2D::name() const { using PoolType = onert::ir::operation::Pool2D::PoolType; static const std::unordered_map<PoolType, std::string> name_map{ - {PoolType::AVG, "Avg" + std::string{toString(opcode())}}, - {PoolType::L2, "L2" + std::string{toString(opcode())}}, - {PoolType::MAX, "Max" + std::string{toString(opcode())}}}; + {PoolType::AVG, "Avg" + std::string{toString(opcode())}}, + {PoolType::L2, "L2" + std::string{toString(opcode())}}, + {PoolType::MAX, "Max" + std::string{toString(opcode())}}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc index 940b1391a..f7c159a12 100644 --- a/runtime/onert/core/src/ir/operation/Pow.cc +++ b/runtime/onert/core/src/ir/operation/Pow.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Pow.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Pow::accept(OperationVisitor &v) const { v.visit(*this); } Pow::Pow(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc index 298c5e745..988a50669 100644 --- a/runtime/onert/core/src/ir/operation/RNN.cc +++ b/runtime/onert/core/src/ir/operation/RNN.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/RNN.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void RNN::accept(OperationVisitor &v) const { v.visit(*this); } RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc index 96ab04c1b..8ced92a0b 100644 --- a/runtime/onert/core/src/ir/operation/Range.cc +++ b/runtime/onert/core/src/ir/operation/Range.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Range.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Range::accept(OperationVisitor &v) const { v.visit(*this); } Range::Range(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc index c357e9018..40797bf29 100644 --- a/runtime/onert/core/src/ir/operation/Rank.cc +++ b/runtime/onert/core/src/ir/operation/Rank.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Rank.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Rank::accept(OperationVisitor &v) const { v.visit(*this); } Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc index d6a1d953c..8da1940fa 100644 --- a/runtime/onert/core/src/ir/operation/Reduce.cc +++ b/runtime/onert/core/src/ir/operation/Reduce.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/Reduce.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir @@ -32,7 +30,7 @@ void Reduce::accept(OperationVisitor &v) const { v.visit(*this); } Reduce::Reduce(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } @@ -40,13 +38,13 @@ std::string Reduce::name() const { using ReduceType = onert::ir::operation::Reduce::ReduceType; static const std::unordered_map<ReduceType, std::string> name_map{ - {ReduceType::ALL, std::string{toString(opcode())} + "All"}, - {ReduceType::ANY, std::string{toString(opcode())} + "Any"}, - {ReduceType::MAX, std::string{toString(opcode())} + "Max"}, - {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"}, - {ReduceType::MIN, std::string{toString(opcode())} + "Min"}, - {ReduceType::PROD, std::string{toString(opcode())} + "Prod"}, - {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}}; + {ReduceType::ALL, std::string{toString(opcode())} + "All"}, + {ReduceType::ANY, std::string{toString(opcode())} + "Any"}, + {ReduceType::MAX, std::string{toString(opcode())} + "Max"}, + {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"}, + {ReduceType::MIN, std::string{toString(opcode())} + "Min"}, + {ReduceType::PROD, std::string{toString(opcode())} + "Prod"}, + {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}}; return name_map.at(_param.reduce_type); // return std::string(toString(opcode())) + reduce_type_str_map.at(_param.reduce_type); } diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc index 92aa89ac6..0ed4affa1 100644 --- a/runtime/onert/core/src/ir/operation/Reshape.cc +++ b/runtime/onert/core/src/ir/operation/Reshape.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Reshape.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Reshape::accept(OperationVisitor &v) const { v.visit(*this); } Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param) { } diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc index d0d89f45f..7d256f447 100644 --- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc +++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ResizeBilinear.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); } ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc index 9f17af97c..58be87b95 100644 --- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc +++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ResizeNearestNeighbor.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -32,7 +29,7 @@ void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc index 4b3c1e1af..6c3746426 100644 --- a/runtime/onert/core/src/ir/operation/Reverse.cc +++ b/runtime/onert/core/src/ir/operation/Reverse.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Reverse.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Reverse::accept(OperationVisitor &v) const { v.visit(*this); } Reverse::Reverse(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Select.cc b/runtime/onert/core/src/ir/operation/Select.cc index 1f22b5234..59684190c 100644 --- a/runtime/onert/core/src/ir/operation/Select.cc +++ b/runtime/onert/core/src/ir/operation/Select.cc @@ -28,7 +28,7 @@ namespace operation void Select::accept(OperationVisitor &v) const { v.visit(*this); } Select::Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc index 2a63d6dcf..f90924488 100644 --- a/runtime/onert/core/src/ir/operation/Shape.cc +++ b/runtime/onert/core/src/ir/operation/Shape.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Shape.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Shape::accept(OperationVisitor &v) const { v.visit(*this); } Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} + : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Slice.cc b/runtime/onert/core/src/ir/operation/Slice.cc index 888b563fb..1362c0f91 100644 --- a/runtime/onert/core/src/ir/operation/Slice.cc +++ b/runtime/onert/core/src/ir/operation/Slice.cc @@ -27,7 +27,7 @@ namespace operation void Slice::accept(OperationVisitor &v) const { v.visit(*this); } Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc index 3f1aa0af1..c06c85309 100644 --- a/runtime/onert/core/src/ir/operation/Softmax.cc +++ b/runtime/onert/core/src/ir/operation/Softmax.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Softmax.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void Softmax::accept(OperationVisitor &v) const { v.visit(*this); } Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc index 53fab4fa9..94acccb0c 100644 --- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc +++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SpaceToBatchND.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); } SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(3u), inputs, outputs} + : Operation{OperandConstraint::createExact(3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc index d8a45aee5..08e7e5190 100644 --- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc +++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SpaceToDepth.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); } SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc index 244884e41..3e371188d 100644 --- a/runtime/onert/core/src/ir/operation/Split.cc +++ b/runtime/onert/core/src/ir/operation/Split.cc @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/Split.h" -#include <cassert> #include "ir/OperationVisitor.h" + namespace onert { namespace ir @@ -25,7 +26,7 @@ namespace operation void Split::accept(OperationVisitor &v) const { v.visit(*this); } Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc index e638c9ac9..be13f167e 100644 --- a/runtime/onert/core/src/ir/operation/SplitV.cc +++ b/runtime/onert/core/src/ir/operation/SplitV.cc @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/SplitV.h" -#include <cassert> #include "ir/OperationVisitor.h" + namespace onert { namespace ir @@ -25,7 +26,7 @@ namespace operation void SplitV::accept(OperationVisitor &v) const { v.visit(*this); } SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc index 49e58aaf2..db93903c7 100644 --- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc +++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SquaredDifference.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); } SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Squeeze.cc b/runtime/onert/core/src/ir/operation/Squeeze.cc index 8cf928fb4..e059c4bee 100644 --- a/runtime/onert/core/src/ir/operation/Squeeze.cc +++ b/runtime/onert/core/src/ir/operation/Squeeze.cc @@ -28,7 +28,7 @@ void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); } Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param) { } diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc index cbb0ff251..94be0be86 100644 --- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc +++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/StatelessRandomUniform.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this); StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc index 2a7905995..a38282c93 100644 --- a/runtime/onert/core/src/ir/operation/StridedSlice.cc +++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/StridedSlice.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); } StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc index 5ba3df2ad..51c1ff1dc 100644 --- a/runtime/onert/core/src/ir/operation/Tile.cc +++ b/runtime/onert/core/src/ir/operation/Tile.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Tile.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -30,7 +27,7 @@ namespace operation void Tile::accept(OperationVisitor &v) const { v.visit(*this); } Tile::Tile(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc index a5e6c6a85..e1723d180 100644 --- a/runtime/onert/core/src/ir/operation/TopKV2.cc +++ b/runtime/onert/core/src/ir/operation/TopKV2.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/TopKV2.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); } TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc index 3a663fbce..dbc5ef2aa 100644 --- a/runtime/onert/core/src/ir/operation/Transpose.cc +++ b/runtime/onert/core/src/ir/operation/Transpose.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Transpose.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -29,9 +26,8 @@ namespace operation void Transpose::accept(OperationVisitor &v) const { v.visit(*this); } -Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) + : Operation{OperandConstraint::createExact(2u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc index 7f29ca44e..944cc365d 100644 --- a/runtime/onert/core/src/ir/operation/TransposeConv.cc +++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/TransposeConv.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert @@ -31,7 +28,7 @@ void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); } TransposeConv::TransposeConv(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc index 67aa54ab5..185eddce3 100644 --- a/runtime/onert/core/src/ir/operation/Unpack.cc +++ b/runtime/onert/core/src/ir/operation/Unpack.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/Unpack.h" #include "ir/OperationVisitor.h" @@ -25,7 +26,7 @@ namespace operation void Unpack::accept(OperationVisitor &v) const { v.visit(*this); } Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc index 2505c60e3..f35996b07 100644 --- a/runtime/onert/core/src/ir/operation/While.cc +++ b/runtime/onert/core/src/ir/operation/While.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/While.h" #include "ir/OperationVisitor.h" @@ -25,7 +26,7 @@ namespace operation void While::accept(OperationVisitor &v) const { v.visit(*this); } While::While(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m) - : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} + : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} { } } // namespace operation diff --git a/runtime/onert/core/src/ir/train/TrainableGraph.cc b/runtime/onert/core/src/ir/train/TrainableGraph.cc new file mode 100644 index 000000000..781f04956 --- /dev/null +++ b/runtime/onert/core/src/ir/train/TrainableGraph.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/TrainableGraph.h" +#include "util/Utils.h" + +#include <algorithm> +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace ir +{ +namespace train +{ + +TrainableGraph::TrainableGraph() : _graph{} {} + +TrainableGraph::TrainableGraph(const TrainableGraph &tgraph) + : _graph{tgraph._graph}, _derivatives{tgraph._derivatives}, _losses{tgraph._losses} +{ + tgraph.operations().iterate( + [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) { + replaceOperation(index, dynamic_cast<const ITrainableOperation &>(op).clone()); + }); +} + +TrainableGraph::TrainableGraph(const Graph &graph) : _graph{graph} {} + +OperandIndex TrainableGraph::addOperand(const Shape &shape, const TypeInfo &type) +{ + return _graph.addOperand(shape, type); +} + +OperandIndex TrainableGraph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand) +{ + return _graph.addOperand(index, std::move(operand)); +} + +OperationIndex TrainableGraph::addOperation(std::unique_ptr<ITrainableOperation> &&operation) +{ + return _graph.addOperation(std::move(operation)); +} + +OperationIndex TrainableGraph::replaceOperation(OperationIndex index, + std::unique_ptr<ITrainableOperation> &&operation) +{ + return _graph.replaceOperation(index, std::move(operation)); +} + +OperandIndex TrainableGraph::addDerivative(OperandIndex index, + std::unique_ptr<Operand> &&derivative) +{ + return _derivatives.push(std::move(derivative), index); +} + +IOIndex TrainableGraph::getInputIndex(const std::string &name) const +{ + return _graph.getInputIndex(name); +} + +IOIndex TrainableGraph::getOutputIndex(const std::string &name) const +{ + return _graph.getOutputIndex(name); +} + +void TrainableGraph::changeShape(const OperandIndex &index, const ir::Shape &new_shape) +{ + _graph.changeShape(index, new_shape); +} + +void TrainableGraph::changeDerivativeShape(const OperandIndex &index, const ir::Shape &new_shape) +{ + assert(_derivatives.exist(index)); + _derivatives.at(index).info().shape(new_shape); +} + +void TrainableGraph::addInput(const OperandIndex &ind, const std::string &name) +{ + _graph.addInput(ind, name); +} + +void TrainableGraph::addOutput(const OperandIndex &ind, const std::string &name) +{ + _graph.addOutput(ind, name); +} + +void TrainableGraph::verify(void) const +{ + _graph.verify(); + + operations().iterate([](const onert::ir::OperationIndex &, const onert::ir::IOperation &op) { + try + { + UNUSED_RELEASE(dynamic_cast<const onert::ir::train::ITrainableOperation &>(op)); + } + catch (const std::bad_cast &) + { + std::runtime_error("TrainableGraph: " + op.name() + " is not a trainable operation"); + } + }); +} + +void TrainableGraph::removeOperand(const OperandIndex &ind) { _graph.removeOperand(ind); } + +void TrainableGraph::setLayout(Layout layout) { _graph.setLayout(layout); } + +const ITrainableOperation &TrainableGraph::operation(OperationIndex index) const +{ + // NOTE Virtual inherited objects cannot be static_casted. + return dynamic_cast<const ITrainableOperation &>(_graph.operations().at(index)); +} + +std::vector<ir::OperationIndex> TrainableGraph::topolSortOperations() const +{ + return _graph.topolSortOperations(); +} + +void TrainableGraph::addLoss(const OperandIndex &loss_ind, const IOIndex &pred_ioind) +{ + _losses.emplace(pred_ioind, loss_ind); +} + +OperandIndex TrainableGraph::getLossIndex(const IOIndex &pred_ioind) const +{ + auto itr = _losses.find(pred_ioind); + return (itr == _losses.end()) ? OperandIndex{} : itr->second; +} + +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Conv2D.cc b/runtime/onert/core/src/ir/train/operation/Conv2D.cc new file mode 100644 index 000000000..923861ae3 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Conv2D.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Conv2D.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Conv2D::clone() const +{ + return std::make_unique<Conv2D>(*this); +} + +void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); } + +void Conv2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Conv2D::Conv2D(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc new file mode 100644 index 000000000..1dae3f674 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/ElementwiseActivation.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> ElementwiseActivation::clone() const +{ + return std::make_unique<ElementwiseActivation>(*this); +} + +void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); } + +void ElementwiseActivation::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +ElementwiseActivation::ElementwiseActivation(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/FullyConnected.cc b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc new file mode 100644 index 000000000..a26f7c489 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/FullyConnected.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> FullyConnected::clone() const +{ + return std::make_unique<FullyConnected>(*this); +} + +void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); } + +void FullyConnected::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +FullyConnected::FullyConnected(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Loss.cc b/runtime/onert/core/src/ir/train/operation/Loss.cc new file mode 100644 index 000000000..abd79929b --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Loss.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Loss.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +#include <misc/polymorphic_downcast.h> + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Loss::clone() const { return std::make_unique<Loss>(*this); } + +void Loss::accept(OperationVisitor &v) const { v.visit(*this); } + +void Loss::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Loss::Loss(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Permute.cc b/runtime/onert/core/src/ir/train/operation/Permute.cc new file mode 100644 index 000000000..adc23aa49 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Permute.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Permute.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Permute::clone() const +{ + return std::make_unique<Permute>(*this); +} + +void Permute::accept(OperationVisitor &v) const { v.visit(*this); } + +void Permute::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Permute::Permute(const OperationType &operation) + : OperationType{operation.getInputs().at(0), operation.getOutputs().at(0), + operation.getPermuteType()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Pool2D.cc b/runtime/onert/core/src/ir/train/operation/Pool2D.cc new file mode 100644 index 000000000..021574f19 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Pool2D.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Pool2D.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Pool2D::clone() const +{ + return std::make_unique<Pool2D>(*this); +} + +void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } + +void Pool2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Pool2D::Pool2D(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Reshape.cc b/runtime/onert/core/src/ir/train/operation/Reshape.cc new file mode 100644 index 000000000..c76158607 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Reshape.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Reshape.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Reshape::clone() const +{ + return std::make_unique<Reshape>(*this); +} + +void Reshape::accept(OperationVisitor &v) const { v.visit(*this); } + +void Reshape::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Reshape::Reshape(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/train/operation/Softmax.cc b/runtime/onert/core/src/ir/train/operation/Softmax.cc new file mode 100644 index 000000000..dbd403879 --- /dev/null +++ b/runtime/onert/core/src/ir/train/operation/Softmax.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/train/operation/Softmax.h" + +#include "ir/OperationVisitor.h" +#include "ir/train/TrainableOperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace train +{ +namespace operation +{ + +std::unique_ptr<ITrainableOperation> Softmax::clone() const +{ + return std::make_unique<Softmax>(*this); +} + +void Softmax::accept(OperationVisitor &v) const { v.visit(*this); } + +void Softmax::accept(TrainableOperationVisitor &v) const { v.visit(*this); } + +Softmax::Softmax(const OperationType &operation) + : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()} +{ + // DO NOTHING +} + +} // namespace operation +} // namespace train +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc index 09cbdcf2f..6260d29ff 100644 --- a/runtime/onert/core/src/ir/verifier/Verifier.cc +++ b/runtime/onert/core/src/ir/verifier/Verifier.cc @@ -39,11 +39,11 @@ bool DAGChecker::verify(const Graph &graph) const noexcept OperationIndexMap<bool> visited; operations.iterate( - [&](const OperationIndex &index, const Operation &) { visited[index] = false; }); + [&](const OperationIndex &index, const IOperation &) { visited[index] = false; }); OperationIndexMap<bool> on_stack = visited; // Copy from visited - std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive = - [&](const OperationIndex &index, const Operation &node) -> void { + std::function<void(const OperationIndex &index, const IOperation &)> dfs_recursive = + [&](const OperationIndex &index, const IOperation &node) -> void { if (on_stack[index]) cyclic = true; if (visited[index]) @@ -51,7 +51,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept visited[index] = true; on_stack[index] = true; - for (auto output : node.getOutputs() | Remove::DUPLICATED) + for (auto &&output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED) { const auto &operand = graph.operands().at(output); for (const auto &use : operand.getUses()) @@ -72,12 +72,12 @@ bool DAGChecker::verify(const Graph &graph) const noexcept // EdgeConsistencyVerifier // -bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept +bool EdgeChecker::verify(const Graph &graph) const noexcept { auto &operations = graph.operations(); uint32_t errors = 0; - operations.iterate([&](const OperationIndex &index, const Operation &node) { - for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED) + operations.iterate([&](const OperationIndex &index, const IOperation &node) { + for (auto &&operand_index : node.getInputs() | ir::Remove::UNDEFINED) { try { @@ -85,48 +85,60 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept bool operand_has_use = operand.getUses().contains(index); if (!operand_has_use) { - VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand " - << operand_index << " to Operation " << index - << std::endl; + VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand " + << operand_index << " to Operation " << index << std::endl; errors += 1; } } catch (const std::out_of_range &e) { - VERBOSE(EdgeConsistencyChecker) - << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand " - << operand_index << ", but the operand object is not present in the graph" << std::endl; + VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index + << " has Operand " << operand_index + << ", but the operand object is not present in the graph" << std::endl; errors += 1; } } - for (auto operand_index : node.getOutputs()) + for (auto &&operand_index : node.getOutputs() | ir::Remove::UNDEFINED) { try { auto &operand = graph.operands().at(operand_index); if (operand.getDef() != index) { - VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand" - << operand_index << " to Operation " << index - << std::endl; + VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand" + << operand_index << " to Operation " << index << std::endl; errors += 1; } } catch (const std::out_of_range &e) { - VERBOSE(EdgeConsistencyChecker) - << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand " - << operand_index << ", but the operand object is not present in the graph" << std::endl; + VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index + << " has Operand " << operand_index + << ", but the operand object is not present in the graph" << std::endl; errors += 1; } } }); - VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl; + VERBOSE(EdgeChecker) << "Total Number of errors : " << errors << std::endl; return errors == 0; } +bool InputOutputChecker::verify(const Graph &graph) const noexcept +{ + for (auto &&operand_ind : + (graph.getInputs() + graph.getOutputs()) | Remove::DUPLICATED | Remove::UNDEFINED) + { + if (!graph.operands().exist(operand_ind)) + { + VERBOSE(InputOutputChecker) << "Input or Output tensor " << operand_ind << " does not exist."; + return false; + } + } + return true; +} + } // namespace verifier } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h index 0c7b57b04..fa1311983 100644 --- a/runtime/onert/core/src/ir/verifier/Verifier.h +++ b/runtime/onert/core/src/ir/verifier/Verifier.h @@ -55,7 +55,16 @@ public: bool verify(const Graph &graph) const noexcept override; }; -class EdgeConsistencyChecker : public IVerifier +class EdgeChecker : public IVerifier +{ +public: + bool verify(const Graph &graph) const noexcept override; +}; + +/** + * @brief Check model input and output operands are really exist in the graph + */ +class InputOutputChecker : public IVerifier { public: bool verify(const Graph &graph) const noexcept override; diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc new file mode 100644 index 000000000..1ec71cd55 --- /dev/null +++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Verifier.h" + +#include "../MockNode.h" + +#include "ir/Graph.h" + +#include <gtest/gtest.h> + +#include <memory> + +using IndexSet = onert::ir::OperandIndexSequence; +using Mock = onert_test::ir::SimpleMock; + +TEST(Verifier, dag_checker) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2})); + + onert::ir::verifier::DAGChecker verifier; + + ASSERT_TRUE(verifier.verify(graph)); +} + +TEST(Verifier, neg_edge_consistency_checker_1) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}); + auto op_ind = graph.addOperation(std::move(mock_op)); + + graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone + + onert::ir::verifier::EdgeChecker verifier; + ASSERT_FALSE(verifier.verify(graph)); +} + +TEST(Verifier, neg_edge_consistency_checker_2) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}); + auto mock_op_ptr = mock_op.get(); + auto op_ind = graph.addOperation(std::move(mock_op)); + + mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone + + onert::ir::verifier::EdgeChecker verifier; + ASSERT_FALSE(verifier.verify(graph)); +} diff --git a/runtime/onert/core/src/odc/QuantizeManager.cc b/runtime/onert/core/src/odc/QuantizeManager.cc new file mode 100644 index 000000000..71572a7e0 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizeManager.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" +#include "odc/QuantizeManager.h" + +#include <iostream> +#include <mutex> + +namespace onert +{ +namespace odc +{ + +bool QuantizeManager::quantize() +{ + // Compile function is thread-unsafe + static std::mutex lock; + std::lock_guard<std::mutex> guard(lock); + + if (_export_model_path.empty()) + throw std::runtime_error("Export model path is not set"); + + auto &quantize_loader = QuantizerLoader::instance(); + if (quantize_loader.loadLibrary() != 0) + return false; + + auto quantizer = quantize_loader.get(); + auto result = quantizer->quantize(_model_path.c_str(), _export_model_path.c_str(), _is_q16); + + // TODO Unload quantize library to reduce memory usage + + return (result == 0); +} + +} // namespace odc +} // namespace onert diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/odc/QuantizeManager.test.cc index 7d2757e58..4e155a6ef 100644 --- a/runtime/onert/core/src/util/GeneralConfigSource.cc +++ b/runtime/onert/core/src/odc/QuantizeManager.test.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,32 +14,23 @@ * limitations under the License. */ -#include "util/GeneralConfigSource.h" -#include "util/logging.h" +#include "odc/QuantizeManager.h" -namespace onert -{ -namespace util -{ +#include <gtest/gtest.h> -std::string GeneralConfigSource::get(const std::string &key) const +using namespace onert::odc; + +// Test export model path is not set +TEST(odc_QuantizeManager, neg_export_model_path) { - auto itr = _map.find(key); - if (itr == _map.end()) - { - return ""; - } - else - { - return itr->second; - } + QuantizeManager manager("model_path"); + ASSERT_THROW(manager.quantize(), std::runtime_error); } -void GeneralConfigSource::set(const std::string &key, const std::string &val) +// Test invalid model path +TEST(odc_QuantizeManager, neg_invalid_model_path) { - VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl; - _map[key] = val; + QuantizeManager manager("invalid_model_path.circle"); + manager.exportModelPath("export_model_path.circle"); + ASSERT_EQ(manager.quantize(), false); } - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/odc/QuantizerLoader.cc b/runtime/onert/core/src/odc/QuantizerLoader.cc new file mode 100644 index 000000000..8a972e97e --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" + +#include <dlfcn.h> +#include <iostream> +#include <string> + +static const char *SHARED_LIB_EXT = +#if defined(__APPLE__) && defined(__MACH__) + ".dylib"; +#else + ".so"; +#endif + +namespace onert +{ +namespace odc +{ + +QuantizerLoader &QuantizerLoader::instance() +{ + static QuantizerLoader singleton; + return singleton; +} + +int32_t QuantizerLoader::loadLibrary() +{ + if (get() != nullptr) + return 0; + + const std::string quantize_so = std::string("libonert_odc") + SHARED_LIB_EXT; + void *handle = dlopen(quantize_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + auto dlerror_msg = dlerror(); + + if (handle == nullptr) + { + std::cerr << "Failed to load " << quantize_so << std::endl; + std::cerr << dlerror_msg << std::endl; + return 1; + } + + { + const char *factory_name = "create_quantizer"; + auto factory = (factory_t)dlsym(handle, factory_name); + dlerror_msg = dlerror(); + + if (factory == nullptr) + { + std::cerr << "QuantizerLoader: unable to find function " << factory_name << dlerror_msg + << std::endl; + dlclose(handle); + return 1; + } + + auto destroyer = (quantizer_destory_t)dlsym(handle, "destroy_quantizer"); + _quantizer = std::unique_ptr<IQuantizer, quantizer_destory_t>(factory(), destroyer); + + if (_quantizer == nullptr) + { + std::cerr << "QuantizerLoader: unable to create quantizer" << std::endl; + dlclose(handle); + return 1; + } + } + + // Save quantize library handle (avoid warning by handle lost without dlclose()) + // clang-format off + _dlhandle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [filename = quantize_so](void *h) { + if (dlclose(h) != 0) + std::cerr << "Failed to unload backend " << filename << std::endl; + }}; + // clang-format on + + return 0; +} + +int32_t QuantizerLoader::unloadLibrary() +{ + if (get() == nullptr) + return 0; + + _quantizer.reset(nullptr); + _dlhandle.reset(nullptr); + + return 0; +} + +} // namespace odc +} // namespace onert diff --git a/runtime/onert/core/src/odc/QuantizerLoader.h b/runtime/onert/core/src/odc/QuantizerLoader.h new file mode 100644 index 000000000..36a9f2996 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_ODC_QUANTIZER_LOADER_H__ +#define __ONERT_ODC_QUANTIZER_LOADER_H__ + +#include "odc/IQuantizer.h" + +#include <functional> +#include <memory> + +namespace onert +{ +namespace odc +{ + +/** + * @brief Class to manage loading and unloading of dynamic library containing + * implementation of IQuantizer interface + */ +class QuantizerLoader +{ +public: + /** + * @brief Typedef for function pointer to destroy loaded library handle + */ + using dlhandle_destroy_t = std::function<void(void *)>; + /** + * @brief Typedef for function pointer to create instance of IQuantizer + */ + using factory_t = IQuantizer *(*)(); + /** + * @brief Typedef for function pointer to destroy instance of IQuantizer + */ + using quantizer_destory_t = void (*)(IQuantizer *); + + /** + * @brief Get singleton instance of QuantizerLoader + * @return Reference to singleton instance of QuantizerLoader + */ + static QuantizerLoader &instance(); + +private: + // Cannot create instance of QuantizerLoader outside of this class + QuantizerLoader() = default; + QuantizerLoader(QuantizerLoader const &) = delete; + QuantizerLoader &operator=(QuantizerLoader const &) = delete; + ~QuantizerLoader() = default; + +public: + /** + * @brief Load dynamic library containing implementation of IQuantizer + * @return 0 if success, otherwise errno value + */ + int32_t loadLibrary(); + /** + * @brief Unload dynamic library containing implementation of IQuantizer + * @return 0 if success, otherwise errno value + */ + int32_t unloadLibrary(); + /** + * @brief Get instance of IQuantizer created through factory method + * @return Pointer to instance of IQuantizer + */ + IQuantizer *get() const { return _quantizer.get(); } + +private: + // Note: Keep handle to avoid svace warning of "handle lost without dlclose()" + std::unique_ptr<void, dlhandle_destroy_t> _dlhandle; + std::unique_ptr<IQuantizer, quantizer_destory_t> _quantizer{nullptr, nullptr}; +}; + +} // namespace odc +} // namespace onert + +#endif // __ONERT_ODC_QUANTIZER_LOADER_H__ diff --git a/runtime/onert/core/src/odc/QuantizerLoader.test.cc b/runtime/onert/core/src/odc/QuantizerLoader.test.cc new file mode 100644 index 000000000..112e65b27 --- /dev/null +++ b/runtime/onert/core/src/odc/QuantizerLoader.test.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizerLoader.h" + +#include <gtest/gtest.h> + +using namespace onert::odc; + +// Test QuantizerLoader singleton +TEST(odc_QuantizerLoader, singleton) +{ + QuantizerLoader &loader1 = QuantizerLoader::instance(); + QuantizerLoader &loader2 = QuantizerLoader::instance(); + ASSERT_EQ(&loader1, &loader2); +} + +// Test load quantizer library +TEST(odc_QuantizerLoader, load) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + // Unload because it may be loaded on previous tests + ASSERT_EQ(loader.unloadLibrary(), 0); + + if (loader.loadLibrary() == 0) + { + // Load twice to check if it is thread-safe + ASSERT_EQ(loader.loadLibrary(), 0); + } +} + +// Get quantizer function without loading quantizer library +TEST(odc_QuantizerLoader, neg_get) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + // Unload because it may be loaded on previous tests + ASSERT_EQ(loader.unloadLibrary(), 0); + ASSERT_EQ(loader.get(), nullptr); +} + +// Check quantizer function pointer when QuantizerLoader is unloaded +TEST(odc_QuantizerLoader, neg_unload) +{ + QuantizerLoader &loader = QuantizerLoader::instance(); + if (loader.loadLibrary() == 0) + ASSERT_NE(loader.get(), nullptr); + + ASSERT_EQ(loader.unloadLibrary(), 0); + ASSERT_EQ(loader.get(), nullptr); +} diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc new file mode 100644 index 000000000..c3f5179df --- /dev/null +++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EventWriter.h" + +#include <cassert> +#include <sstream> +#include <utility> +#include <vector> + +// json type for ChromeTracingWriter +namespace +{ + +std::string quote(const std::string &value) +{ + std::stringstream ss; + ss << '"' << value << '"'; + return ss.str(); +} + +std::string field(const std::string &k, const std::string &v) +{ + std::stringstream ss; + ss << quote(k) << " : " << quote(v); + return ss.str(); +} + +struct Content // One Entry in Chrome Event Trace +{ + std::vector<std::pair<std::string, std::string>> flds; + std::vector<std::pair<std::string, std::string>> args; +}; + +std::string object(const Content &content) +{ + std::stringstream ss; + + ss << "{ "; + + ss << field(content.flds[0].first, content.flds[0].second); + + for (uint32_t n = 1; n < content.flds.size(); ++n) + { + ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second); + } + + if (content.args.size() > 0) + { + ss << ", " << quote("args") << " : { "; + ss << field(content.args.at(0).first, content.args.at(0).second); + + for (uint32_t n = 1; n < content.args.size(); ++n) + { + ss << ", " << field(content.args.at(n).first, content.args.at(n).second); + } + + ss << "}"; + } + + ss << " }"; + + return ss.str(); +} + +void fill(Content &content, const DurationEvent &evt, const std::string &name, + const std::string &tid) +{ + content.flds.emplace_back("name", name); + content.flds.emplace_back("pid", "0"); + content.flds.emplace_back("tid", tid); + content.flds.emplace_back("ph", evt.ph); + content.flds.emplace_back("ts", evt.ts); + content.args = evt.args; +} + +void fill(Content &content, const CounterEvent &evt) +{ + assert(evt.name != ""); + + content.flds.emplace_back("name", evt.name); + content.flds.emplace_back("pid", "0"); + content.flds.emplace_back("tid", evt.tid); + content.flds.emplace_back("ph", evt.ph); + content.flds.emplace_back("ts", evt.ts); + content.args = evt.args; +} + +std::string object(const DurationEvent &evt, const std::string &name, const std::string &tid) +{ + Content content; + + fill(content, evt, name, tid); + + return ::object(content); +} + +std::string object(const CounterEvent &evt) +{ + Content content; + + fill(content, evt); + + for (auto it = evt.values.begin(); it != evt.values.end(); ++it) + { + content.args.emplace_back(it->first, it->second); + } + + return ::object(content); +} + +std::string getSessionLabel(const DurationEvent &evt) +{ + return "$" + std::to_string(evt.session_index) + " sess"; +} + +std::string getSubgLabel(const DurationEvent &evt) +{ + return "$" + std::to_string(evt.subg_index) + " subg"; +} + +std::string getOpLabel(const OpSeqDurationEvent &evt) +{ + return "@" + std::to_string(evt.op_index) + " " + evt.op_name; +} + +std::string getLabel(const DurationEvent &evt) +{ + if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt)) + { + return getOpLabel(*evt_ptr); + } + else // SubgDurationEvent + { + return getSubgLabel(evt); + } +} + +std::string getTid(const DurationEvent &evt) +{ + if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt)) + { + return getSessionLabel(*evt_ptr) + ", " + getSubgLabel(*evt_ptr) + ", " + evt_ptr->backend; + } + else // SubgDurationEvent + { + return getSessionLabel(evt) + ", " + getSubgLabel(evt); + } +} + +} // namespace + +void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders) +{ + _os << "{\n"; + _os << " " << quote("traceEvents") << ": [\n"; + + for (const auto &recorder : recorders) + { + flushOneRecord(*recorder); + } + + _os << " { }\n"; + _os << " ]\n"; + _os << "}\n"; +} + +void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder) +{ + for (const auto &evt : recorder.duration_events()) + { + const std::string name = getLabel(*evt); + const std::string tid = getTid(*evt); + + _os << " " << object(*evt, name, tid) << ",\n"; + } + + for (const auto &evt : recorder.counter_events()) + { + _os << " " << object(evt) << ",\n"; + } +} diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc index 45cce662e..b7fcefc7a 100644 --- a/runtime/onert/core/src/util/ConfigSource.cc +++ b/runtime/onert/core/src/util/ConfigSource.cc @@ -15,13 +15,15 @@ */ #include "util/ConfigSource.h" -#include "util/GeneralConfigSource.h" -#include "util/EnvConfigSource.h" +#include "util/logging.h" + +#include <misc/EnvConfigSource.h> +#include <misc/GeneralConfigSource.h> +#include <misc/IConfigSource.h> -#include <array> #include <algorithm> +#include <array> #include <cassert> - #include <memory> namespace onert @@ -29,9 +31,26 @@ namespace onert namespace util { +using namespace nnfw::misc; + static std::unique_ptr<IConfigSource> _source; +static std::unique_ptr<IConfigSource> _source_ext; void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); } +void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); } + +void setConfigKeyValues(const CfgKeyValues &keyValues) +{ + auto configsrc = std::make_unique<GeneralConfigSource>(); + + for (auto it = keyValues.begin(); it != keyValues.end(); ++it) + { + VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; + configsrc->set(it->first, it->second); + } + + onert::util::config_source_ext(std::move(configsrc)); +} static IConfigSource *config_source() { @@ -67,6 +86,15 @@ static std::string getConfigOrDefault(const std::string &key) auto ret = config_source()->get(key); if (ret.empty()) { + // if env is not set, search from external + if (_source_ext.get()) + { + ret = _source_ext.get()->get(key); + } + } + // if not found search from defaults + if (ret.empty()) + { auto itr = defaults.find(key); if (itr != defaults.end()) { diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc index de37276bf..c1b9c4315 100644 --- a/runtime/onert/core/src/util/EventCollector.cc +++ b/runtime/onert/core/src/util/EventCollector.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventCollector.h" +#include "EventCollector.h" // C++ standard libraries #include <chrono> @@ -30,24 +30,62 @@ std::string timestamp(void) { auto now = std::chrono::steady_clock::now(); return std::to_string( - std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count()); + std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count()); } -class DurationEventBuilder +class DurationEventBuilder : public EventCollector::EventVisitor { public: DurationEventBuilder(const std::string &ts) : _ts{ts} {} - DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const + std::unique_ptr<SubgDurationEvent> build(const EventCollector::SubgEvent &evt_collected, + const std::string &ph) const { - DurationEvent evt; + auto dur_evt = std::make_unique<SubgDurationEvent>(); - evt.name = name; - evt.tid = tid; - evt.ph = ph; - evt.ts = _ts; + // The following will be set by a child of EventsWriter: + // dur_evt.name, dur_evt.tid + dur_evt->ph = ph; + dur_evt->ts = _ts; + dur_evt->tracing_ctx = evt_collected.tracing_ctx; - return evt; + dur_evt->session_index = evt_collected.session_index; + dur_evt->subg_index = evt_collected.subg_index; + + dur_evt->args = evt_collected.userData; + { + dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index)); + dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index)); + } + + return dur_evt; + } + + std::unique_ptr<OpSeqDurationEvent> build(const EventCollector::OpSeqEvent &evt_collected, + const std::string &ph) const + { + auto dur_evt = std::make_unique<OpSeqDurationEvent>(); + + // The following will be set by a child of EventsWriter: + // dur_evt.name, dur_evt.tid + dur_evt->ph = ph; + dur_evt->ts = _ts; + dur_evt->tracing_ctx = evt_collected.tracing_ctx; + + dur_evt->session_index = evt_collected.session_index; + dur_evt->subg_index = evt_collected.subg_index; + + dur_evt->backend = evt_collected.backend; + dur_evt->op_index = evt_collected.op_index; + dur_evt->op_name = evt_collected.op_name; + + dur_evt->args = evt_collected.userData; + { + dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index)); + dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index)); + } + + return dur_evt; } private: @@ -86,19 +124,26 @@ inline void emit_rusage(EventRecorder *rec, const std::string &ts) } // namespace -void EventCollector::onEvent(const Event &event) +template <typename EventT> void EventCollector::onEvent(const EventT &event) { auto ts = timestamp(); + DurationEventBuilder builder(ts); + switch (event.edge) { case Edge::BEGIN: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B")); + { + auto duration_evt = builder.build(event, "B"); + _rec->emit(std::move(duration_evt)); break; - + } case Edge::END: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E")); + { + auto duration_evt = builder.build(event, "E"); + _rec->emit(std::move(duration_evt)); break; + } } // TODO: Add resurece measurement(e.g. RSS) @@ -107,3 +152,7 @@ void EventCollector::onEvent(const Event &event) emit_rusage(_rec, ts); #endif } + +// template instantiation +template void EventCollector::onEvent<EventCollector::SubgEvent>(const SubgEvent &event); +template void EventCollector::onEvent<EventCollector::OpSeqEvent>(const OpSeqEvent &event); diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h index 8154be592..effb72373 100644 --- a/runtime/onert/core/src/util/EventCollector.h +++ b/runtime/onert/core/src/util/EventCollector.h @@ -17,7 +17,13 @@ #ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__ #define __ONERT_UTIL_EVENT_COLLECTOR_H__ -#include "util/EventRecorder.h" +#include "EventRecorder.h" + +#include "util/TracingCtx.h" + +#include <string> +#include <utility> +#include <vector> class EventCollector { @@ -28,11 +34,69 @@ public: END }; + struct SubgEvent; + struct OpEvent; + + class EventVisitor + { + public: + virtual ~EventVisitor() = default; + + virtual std::unique_ptr<DurationEvent> visit(const SubgEvent &, const std::string &) const + { + throw std::runtime_error("Please implement"); + } + virtual std::unique_ptr<DurationEvent> visit(const OpEvent &, const std::string &) const + { + throw std::runtime_error("Please implement"); + } + }; + struct Event { + const onert::util::TracingCtx *tracing_ctx; + Edge edge; + uint32_t session_index; + uint32_t subg_index; + + // user-defined data: pairs of (key, value) + std::vector<std::pair<std::string, std::string>> userData; + + protected: + Event(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index) + : tracing_ctx(a_tracing_ctx), edge(a_edge), session_index(tracing_ctx->getSessionId()), + subg_index(a_subg_index) + { /* empty */ + } + + virtual ~Event() = default; + }; + + struct SubgEvent : public Event + { + // constructor for subgraph start and end event + SubgEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index) + : Event(a_tracing_ctx, a_edge, a_subg_index) + { /* empty */ + } + }; + + // TODO Rename this to OperationEvent + struct OpSeqEvent : public Event + { std::string backend; - std::string label; + uint32_t op_index; + std::string op_name; + + OpSeqEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index, + const std::string a_backend, uint32_t a_op_index, const std::string a_op_name) + : Event(a_tracing_ctx, a_edge, a_subg_index) + { + backend.assign(a_backend); + op_index = a_op_index; + op_name.assign(a_op_name); + } }; public: @@ -42,7 +106,7 @@ public: } public: - void onEvent(const Event &event); + template <typename EventT> void onEvent(const EventT &event); protected: EventRecorder *_rec; diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc deleted file mode 100644 index d09b95210..000000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/EventCollectorGlobal.h" - -#include <cassert> -#include <fstream> -#include <iostream> - -#include "util/ConfigSource.h" - -namespace onert -{ -namespace util -{ - -EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder} -{ - // DO NOTHING -} - -EventCollectorGlobal::~EventCollectorGlobal() -{ - if (!_recorder.empty()) - { - try - { - // TODO Need better way for saved file path than the hardcoded path - std::ofstream ofs{"trace.global.json"}; - _recorder.writeToFile(ofs); - } - catch (const std::exception &e) - { - std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl; - } - } -} - -EventCollectorGlobal &EventCollectorGlobal::get() -{ - static EventCollectorGlobal instance; - return instance; -} - -EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag} -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} -EventDurationBlock::~EventDurationBlock() -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {} - -EventDurationManual::~EventDurationManual() -{ - // Check if it has called begin-end pair - assert(_pair); -} - -void EventDurationManual::begin() -{ - _pair = false; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} - -void EventDurationManual::end() -{ - assert(!_pair); - _pair = true; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h deleted file mode 100644 index 1027ec84d..000000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ -#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ - -#include "util/EventRecorder.h" -#include "util/EventCollector.h" - -namespace onert -{ -namespace util -{ - -/** - * @brief Singleton class for event collection from anywhere in code - * - */ -class EventCollectorGlobal -{ -public: - /** - * @brief Get the singleton object of this class - * - * @return EventCollectorGlobal& Singleton object - */ - static EventCollectorGlobal &get(); - -public: - /** - * @brief Getter for event collector object - * - * @return EventCollector& Collector object - */ - EventCollector &collector() { return _collector; } - -private: - EventCollectorGlobal(); - ~EventCollectorGlobal(); - -private: - EventRecorder _recorder; - EventCollector _collector; -}; - -/** - * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor - * - */ -class EventDurationBlock -{ -public: - /** - * @brief Raise a duration event with type of BEGIN - * - * @param tag A label for the duration event - */ - EventDurationBlock(const std::string &tag); - /** - * @brief Raise a duration event with type of END - * - */ - ~EventDurationBlock(); - -private: - std::string _tag; -}; - -/** - * @brief Helper class for emitting duration event which is handled manually - * - * Usage: - * { - * ... - * EventDurationManual duration("some tag"); - * duration.begin(); - * ... - * ... // Code for duration - * ... - * duration.end(); - * } - * - */ -class EventDurationManual -{ -public: - /** - * @brief Construct a new Event Duration Manual object - * - * @param tag A label for the duration object - */ - EventDurationManual(const std::string &tag); - /** - * @brief Destroy the Event Duration Manual object - * - */ - ~EventDurationManual(); - - /** - * @brief Raise a duration event with type of BEGIN - * - */ - void begin(); - /** - * @brief Raise a duration event with type of END - * - */ - void end(); - -private: - std::string _tag; - bool _pair; -}; - -} // namespace util -} // namespace onert - -/** - * Helper Macro Definitions - * - * HOW TO USE - * - * void f(args) - * { - * EVENT_DURATION_FUNCTION(); - * ... - * if(cond) - * { - * EVENT_DURATION_REGION("if branch"); - * ... - * } - * ... - * } - */ - -#define EVENT_DURATION_FUNCTION() \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ } - -#define EVENT_DURATION_REGION(tag) \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag } - -#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc index 13a599bed..85a588d38 100644 --- a/runtime/onert/core/src/util/EventRecorder.cc +++ b/runtime/onert/core/src/util/EventRecorder.cc @@ -14,396 +14,13 @@ * limitations under the License. */ -#include "util/EventRecorder.h" +#include "EventRecorder.h" -#include <sstream> -#include <vector> -#include <unordered_map> -#include <json/json.h> -#include <assert.h> -#include <utility> -#include <map> -#include <set> -#include <stdint.h> - -// json type for Chrome Event Trace -namespace -{ - -std::string quote(const std::string &value) -{ - std::stringstream ss; - ss << '"' << value << '"'; - return ss.str(); -} - -std::string field(const std::string &k, const std::string &v) -{ - std::stringstream ss; - ss << quote(k) << " : " << quote(v); - return ss.str(); -} - -struct Content // One Entry in Chrome Event Trace -{ - std::vector<std::pair<std::string, std::string>> flds; - std::vector<std::pair<std::string, std::string>> args; -}; - -std::string object(const Content &content) -{ - std::stringstream ss; - - ss << "{ "; - - ss << field(content.flds[0].first, content.flds[0].second); - - for (uint32_t n = 1; n < content.flds.size(); ++n) - { - ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second); - } - - if (content.args.size() > 0) - { - ss << ", " << quote("args") << " : { "; - ss << field(content.args.at(0).first, content.args.at(0).second); - - for (uint32_t n = 1; n < content.args.size(); ++n) - { - ss << ", " << field(content.args.at(n).first, content.args.at(n).second); - } - - ss << "}"; - } - - ss << " }"; - - return ss.str(); -} - -void fill(Content &content, const Event &evt) -{ - content.flds.emplace_back("name", evt.name); - content.flds.emplace_back("pid", "0"); - content.flds.emplace_back("tid", evt.tid); - content.flds.emplace_back("ph", evt.ph); - content.flds.emplace_back("ts", evt.ts); -} - -std::string object(const DurationEvent &evt) -{ - Content content; - - fill(content, evt); - - return ::object(content); -} - -std::string object(const CounterEvent &evt) -{ - Content content; - - fill(content, evt); - - for (auto it = evt.values.begin(); it != evt.values.end(); ++it) - { - content.args.emplace_back(it->first, it->second); - } - - return ::object(content); -} - -} // namespace - -// md table type -namespace -{ - -void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list) -{ - os << "| "; - for (auto &key : list) - { - os << key << " | "; - } - os << "\n"; -} - -struct MDContent -{ - std::string name; - uint64_t begin_ts; - uint64_t end_ts; - uint32_t min_rss; - uint32_t max_rss; - uint32_t min_page_reclaims; - uint32_t max_page_reclaims; - - MDContent() - : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX), - max_page_reclaims(0) - { - // DO NOTHING - } - - virtual ~MDContent() = default; - - void updateRss(uint32_t rss) - { - if (min_rss == UINT32_MAX) - min_rss = rss; - if (max_rss == 0) - max_rss = rss; - - if (min_rss > rss) - min_rss = rss; - else if (max_rss < rss) - max_rss = rss; - } - - void updateMinflt(uint32_t minflt) - { - if (min_page_reclaims == UINT32_MAX) - min_page_reclaims = minflt; - if (max_page_reclaims == 0) - max_page_reclaims = minflt; - - if (min_page_reclaims > minflt) - min_page_reclaims = minflt; - else if (max_page_reclaims < minflt) - max_page_reclaims = minflt; - } - - virtual void write(std::ostream &os) const = 0; -}; - -struct OpSeq : public MDContent -{ - std::string backend; - uint64_t graph_latency; - - struct OpSeqCmp - { - bool operator()(const OpSeq &lhs, const OpSeq &rhs) const - { - return lhs.begin_ts < rhs.begin_ts; - } - bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; } - bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; } - }; - - void write(std::ostream &os) const override - { - uint64_t opseq_latency = end_ts - begin_ts; - double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0; - writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per), - std::to_string(min_rss), std::to_string(max_rss), - std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)}); - } -}; - -struct Graph : public MDContent -{ - std::set<OpSeq, OpSeq::OpSeqCmp> opseqs; - - void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq) - { - uint64_t graph_latency = end_ts - begin_ts; - for (auto it : name_to_opseq) - { - auto opseq = it.second; - opseq.graph_latency = graph_latency; - - opseqs.insert(opseq); - - updateRss(opseq.min_rss); - updateRss(opseq.max_rss); - updateMinflt(opseq.min_page_reclaims); - updateMinflt(opseq.max_page_reclaims); - } - } - - void write(std::ostream &os) const override - { - static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)", - "page_reclaims_min", "page_reclaims_max"}; - - static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------", - "-----------------", "-----------------"}; - - // Graph's Header - writeMDTableRow(os, graph_headers); - writeMDTableRow(os, graph_headers_line); - - // Graph's contents - writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss), - std::to_string(max_rss), std::to_string(min_page_reclaims), - std::to_string(max_page_reclaims)}); - - os << "\n"; - - static std::vector<std::string> opseq_headers{ - "OpSeq name", "backend", "latency(us)", "latency(%)", - "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"}; - - static std::vector<std::string> opseq_headers_line{ - "----------", "-------", "-----------", "-----------", - "-------", "-------", "-----------------", "-----------------"}; - - os << "## OpSequences \n"; - - // OpSeq's Header - writeMDTableRow(os, opseq_headers); - writeMDTableRow(os, opseq_headers_line); - - // OpSeq's contents - for (auto opseq : opseqs) - { - opseq.write(os); - } - - os << "\n"; - } -}; - -struct MDTableBuilder -{ - MDTableBuilder(const std::vector<DurationEvent> &duration_events, - const std::vector<CounterEvent> &counter_events) - : _duration_events(duration_events), _counter_events(counter_events) - { - for (const auto &evt : _counter_events) - { - uint64_t ts = std::stoull(evt.ts); - auto &name = evt.name; - assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0); - assert(evt.values.size() == 1); - auto &val = evt.values.begin()->second; - if (_ts_to_values.find(ts) == _ts_to_values.end()) - { - std::pair<uint32_t, uint32_t> values; - if (name.compare("maxrss") == 0) - values.first = std::stoul(val); - else - values.second = std::stoul(val); - _ts_to_values.insert({ts, values}); - } - else - { - auto &values = _ts_to_values.at(ts); - if (name.compare("maxrss") == 0) - values.first = std::stoul(val); - else - values.second = std::stoul(val); - } - } - } - - MDTableBuilder &build() - { - for (auto &it : divideGraph()) - { - size_t begin_idx = it.first; - size_t end_idx = it.second; - std::map<std::string, OpSeq> name_to_opseq; - for (size_t i = begin_idx + 1; i < end_idx; ++i) - { - const auto &evt = _duration_events[i]; - assert(evt.name.compare("Graph") != 0); - assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0); - if (evt.ph.compare("B") == 0) - { - assert(name_to_opseq.find(evt.name) == name_to_opseq.end()); - name_to_opseq.insert({evt.name, makeOpSeq(evt)}); - } - else - { - assert(name_to_opseq.find(evt.name) != name_to_opseq.end()); - auto &opseq = name_to_opseq.at(evt.name); - updateOpSeq(opseq, evt); - } - } - - _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq)); - } - - return *this; - } - - std::vector<std::pair<size_t, size_t>> divideGraph() - { - std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx> - for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i) - { - const auto &evt = _duration_events.at(i); - if (evt.name.compare("Graph") == 0) - { - if (evt.ph.compare("B") == 0) - begin_idx = i; - else - graph_idx_list.emplace_back(begin_idx, i); - } - } - return graph_idx_list; - } - - OpSeq makeOpSeq(const DurationEvent &evt) - { - OpSeq opseq; - opseq.name = evt.name; - opseq.begin_ts = std::stoull(evt.ts); - opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first); - opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second); - opseq.backend = evt.tid; - return opseq; - } - - void updateOpSeq(OpSeq &opseq, const DurationEvent &evt) - { - opseq.end_ts = std::stoull(evt.ts); - opseq.updateRss(_ts_to_values.at(opseq.end_ts).first); - opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second); - } - - Graph makeGraph(size_t begin_idx, size_t end_idx, - const std::map<std::string, OpSeq> &name_to_opseq) - { - Graph graph; - graph.name = "Graph"; - graph.begin_ts = std::stoull(_duration_events[begin_idx].ts); - graph.updateRss(_ts_to_values.at(graph.begin_ts).first); - graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second); - graph.end_ts = std::stoull(_duration_events[end_idx].ts); - graph.updateRss(_ts_to_values.at(graph.end_ts).first); - graph.updateMinflt(_ts_to_values.at(graph.end_ts).second); - graph.setOpSeqs(name_to_opseq); - return graph; - } - - void write(std::ostream &os) - { - // Write contents - for (size_t i = 0; i < _graphs.size(); ++i) - { - os << "# Graph " << i << "\n"; - _graphs.at(i).write(os); - } - } - - const std::vector<DurationEvent> &_duration_events; - const std::vector<CounterEvent> &_counter_events; - // timestamp to std::pair<maxrss, minflt> - std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values; - std::vector<Graph> _graphs; -}; - -} // namespace - -void EventRecorder::emit(const DurationEvent &evt) +void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt) { std::lock_guard<std::mutex> lock{_mu}; - _duration_events.push_back(evt); + _duration_events.push_back(std::move(evt)); } void EventRecorder::emit(const CounterEvent &evt) @@ -412,146 +29,3 @@ void EventRecorder::emit(const CounterEvent &evt) _counter_events.push_back(evt); } - -void EventRecorder::writeToFile(std::ostream &os) -{ - std::lock_guard<std::mutex> lock{_mu}; - - switch (_write_format) - { - case WriteFormat::CHROME_TRACING: - writeChromeTrace(os); - break; - case WriteFormat::SNPE_BENCHMARK: - writeSNPEBenchmark(os); - break; - case WriteFormat::MD_TABLE: - writeMDTable(os); - break; - default: - assert(!"Invalid value"); - break; - } -} - -void EventRecorder::writeSNPEBenchmark(std::ostream &os) -{ - Json::Value root; - auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue}; - - struct Stat - { - uint64_t sum = 0; - uint64_t count = 0; - uint64_t max = 0; - uint64_t min = std::numeric_limits<uint64_t>::max(); - - void accumulate(uint64_t val) - { - sum += val; - count++; - max = std::max(max, val); - min = std::min(min, val); - } - }; - - // Memory - { - std::unordered_map<std::string, Stat> mem_stats; - for (auto &evt : _counter_events) - { - auto &mem_stat = mem_stats[evt.name]; - uint64_t val = std::stoull(evt.values["value"]); - mem_stat.accumulate(val); - } - - auto &mem = exec_data["memory"] = Json::Value{Json::objectValue}; - for (auto &kv : mem_stats) - { - auto &key = kv.first; - auto &val = kv.second; - mem[key]["Avg_Size"] = val.sum / val.count; - mem[key]["Max_Size"] = val.max; - mem[key]["Min_Size"] = val.min; - mem[key]["Runtime"] = "NA"; - } - } - - // Operation Execution Time - { - // NOTE This assumes _duration_events is sorted by "ts" ascending - - // 2D keys : stats[tid][name] - std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats; - std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps; - for (auto &evt : _duration_events) - { - auto &stat = stats[evt.tid][evt.name]; - auto &begin_ts = begin_timestamps[evt.tid][evt.name]; - uint64_t timestamp = std::stoull(evt.ts); - if (evt.ph == "B") - { - if (begin_ts != 0) - throw std::runtime_error{"Invalid Data"}; - begin_ts = timestamp; - } - else if (evt.ph == "E") - { - if (begin_ts == 0 || timestamp < begin_ts) - throw std::runtime_error{"Invalid Data"}; - stat.accumulate(timestamp - begin_ts); - begin_ts = 0; - } - else - throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""}; - } - - for (auto &kv : begin_timestamps) - for (auto &kv2 : kv.second) - if (kv2.second != 0) - throw std::runtime_error{"Invalid Data - B and E pair does not match."}; - - for (auto &kv : stats) - { - auto &tid = kv.first; - auto &map = kv.second; - auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue}; - for (auto &kv : map) - { - auto &name = kv.first; - auto &val = kv.second; - json_tid[name]["Avg_Time"] = val.sum / val.count; - json_tid[name]["Max_Time"] = val.max; - json_tid[name]["Min_Time"] = val.min; - json_tid[name]["Runtime"] = tid; - } - } - } - - os << root; -} - -void EventRecorder::writeChromeTrace(std::ostream &os) -{ - os << "{\n"; - os << " " << quote("traceEvents") << ": [\n"; - - for (auto &evt : _duration_events) - { - os << " " << object(evt) << ",\n"; - } - - for (auto &evt : _counter_events) - { - os << " " << object(evt) << ",\n"; - } - - os << " { }\n"; - os << " ]\n"; - os << "}\n"; -} - -void EventRecorder::writeMDTable(std::ostream &os) -{ - MDTableBuilder(_duration_events, _counter_events).build().write(os); -} diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h index 37ec1a0f1..5cf03d8ac 100644 --- a/runtime/onert/core/src/util/EventRecorder.h +++ b/runtime/onert/core/src/util/EventRecorder.h @@ -17,28 +17,52 @@ #ifndef __ONERT_UTIL_EVENT_RECORDER_H__ #define __ONERT_UTIL_EVENT_RECORDER_H__ +#include "util/TracingCtx.h" + #include <map> #include <memory> #include <mutex> -#include <ostream> #include <vector> +// refer to https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit# struct Event { - std::string name; - std::string tid; - std::string ph; /* REQUIRED */ - std::string ts; /* REQUIRED */ + const onert::util::TracingCtx *tracing_ctx; + + std::string ph; // Event type. + std::string ts; // tracing clock of timestamp of this event + std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value) + + virtual ~Event() = default; }; struct DurationEvent : public Event { - // TO BE FILLED + uint32_t session_index = 0; + uint32_t subg_index = 0; + +protected: + DurationEvent() = default; +}; + +struct SubgDurationEvent : public DurationEvent +{ /* same with DurationEvent */ +}; + +// TODO Rename it to OperationDurationEvent +struct OpSeqDurationEvent : public DurationEvent +{ + // Note: DurationEvent's name and tid will be set by EventWriter + std::string backend; + uint32_t op_index; + std::string op_name; }; struct CounterEvent : public Event { + std::string name; // name of event + std::string tid; // thread ID std::map<std::string, std::string> values; }; @@ -50,35 +74,22 @@ struct CounterEvent : public Event class EventRecorder { public: - enum class WriteFormat - { - CHROME_TRACING, - SNPE_BENCHMARK, - MD_TABLE, - }; - -public: EventRecorder() = default; public: - void emit(const DurationEvent &evt); + void emit(std::unique_ptr<DurationEvent> &&evt); void emit(const CounterEvent &evt); public: - bool empty() { return _duration_events.empty() && _counter_events.empty(); } - void writeToFile(std::ostream &os); - void setWriteFormat(WriteFormat write_format) { _write_format = write_format; } - -private: - void writeSNPEBenchmark(std::ostream &os); - void writeChromeTrace(std::ostream &os); - void writeMDTable(std::ostream &os); + const std::vector<std::unique_ptr<DurationEvent>> &duration_events() const + { + return _duration_events; + } + const std::vector<CounterEvent> &counter_events() const { return _counter_events; } private: std::mutex _mu; - // TODO: Allow user to control write_format - WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK}; - std::vector<DurationEvent> _duration_events; + std::vector<std::unique_ptr<DurationEvent>> _duration_events; std::vector<CounterEvent> _counter_events; }; diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc new file mode 100644 index 000000000..ca4bd302e --- /dev/null +++ b/runtime/onert/core/src/util/EventWriter.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EventWriter.h" + +#include <cassert> + +// initialization +std::mutex EventWriter::_mutex; + +void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder) +{ + { + std::unique_lock<std::mutex> lock{_mutex}; + + _recorders.emplace_back(std::move(recorder)); + + if (--_ref_count > 0) + return; + } + // The caller of this method is the last instance that uses EventWriter. + // Let's write log files. + + // Note. According to an internal issue, let snpe json as just file name not '.snpe.json' + flush(WriteFormat::SNPE_BENCHMARK); + flush(WriteFormat::CHROME_TRACING); + flush(WriteFormat::MD_TABLE); +} + +void EventWriter::flush(WriteFormat write_format) +{ + auto *writer = _actual_writers[write_format].get(); + assert(writer); + + writer->flush(_recorders); +} diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h new file mode 100644 index 000000000..0a35a8508 --- /dev/null +++ b/runtime/onert/core/src/util/EventWriter.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_UTIL_EVENT_WRITER_H__ +#define __ONERT_UTIL_EVENT_WRITER_H__ + +#include "EventRecorder.h" + +#include <string> +#include <vector> +#include <unordered_map> +#include <mutex> +#include <fstream> + +class EventFormatWriter +{ +public: + EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {} + virtual ~EventFormatWriter() + { /* empty */ + } + + virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0; + +protected: + std::ofstream _os; +}; + +class SNPEWriter : public EventFormatWriter +{ +public: + SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) + { /* empty */ + } + ~SNPEWriter() {} + + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; +}; + +class ChromeTracingWriter : public EventFormatWriter +{ +public: + ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) + { /* empty */ + } + ~ChromeTracingWriter() {} + + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; + +private: + void flushOneRecord(const EventRecorder &); +}; + +class MDTableWriter : public EventFormatWriter +{ +public: + MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) + { /* empty */ + } + ~MDTableWriter() {} + + void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override; +}; + +#include <mutex> + +class EventWriter +{ +public: + enum class WriteFormat + { + CHROME_TRACING, + SNPE_BENCHMARK, + MD_TABLE, + }; + + /** + * @brief Retuens a singleton object + */ + static EventWriter *get(const std::string &filename) + { + std::unique_lock<std::mutex> lock{_mutex}; + + static EventWriter singleton(filename); + return &singleton; + } + + /** + * @brief Call this when observer which use EventWriter starts + */ + void startToUse() + { + std::unique_lock<std::mutex> lock{_mutex}; + _ref_count++; + } + + /** + * @brief Call this when observer which use EventWriter finishes. + * After multiple observers calls this method, the reference count will eventually be 0. + * Then, EventWriter will write profiling result file. + */ + void readyToFlush(std::unique_ptr<EventRecorder> &&recorder); + +private: + EventWriter(const std::string &filepath) : _ref_count(0) + { + std::string snpe_log_name(filepath); + std::string chrome_tracing_log_name(filepath + ".chrome.json"); + std::string md_table_log_name(filepath + ".table.md"); + + _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name); + _actual_writers[WriteFormat::CHROME_TRACING] = + std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name); + _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name); + }; + + void flush(WriteFormat write_format); + +private: + static std::mutex _mutex; + + // number of observer of an executor that want to write profiling data + int32_t _ref_count; + + // one recorder object per executor + std::vector<std::unique_ptr<EventRecorder>> _recorders; + + std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers; +}; + +#endif // __ONERT_UTIL_EVENT_WRITER_H__ diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/Index.test.cc index 0d25b7353..ff73e5e59 100644 --- a/runtime/onert/core/src/util/EnvConfigSource.cc +++ b/runtime/onert/core/src/util/Index.test.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,27 +14,21 @@ * limitations under the License. */ -#include "util/EnvConfigSource.h" +#include "util/Index.h" -#include <cstdlib> +#include <gtest/gtest.h> -namespace onert -{ -namespace util -{ +using Index = ::onert::util::Index<uint32_t, struct TestTag>; -std::string EnvConfigSource::get(const std::string &key) const +TEST(Index, neg_index_test) { - const char *value = std::getenv(key.c_str()); - if (value != nullptr) - { - return value; - } - else - { - return GeneralConfigSource::get(key); - } -} + Index idx1{1u}; + Index idx2{2u}; + Index idx3{idx1}; -} // namespace util -} // namespace onert + ASSERT_EQ(idx1, 1); + ASSERT_EQ(idx1, 1u); + ASSERT_EQ(idx1.value(), 1u); + ASSERT_NE(idx1, idx2); + ASSERT_EQ(idx1, idx3); +} diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc new file mode 100644 index 000000000..e7d90eec4 --- /dev/null +++ b/runtime/onert/core/src/util/MDTableEventWriter.cc @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EventWriter.h" + +#include <cassert> +#include <map> +#include <set> +#include <sstream> +#include <stdint.h> +#include <unordered_map> +#include <utility> +#include <vector> + +// md table type +namespace +{ + +void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list) +{ + os << "| "; + for (const auto &key : list) + { + os << key << " | "; + } + os << "\n"; +} + +struct MDContent +{ + std::string name; + uint64_t begin_ts; + uint64_t end_ts; + uint32_t min_rss; + uint32_t max_rss; + uint32_t min_page_reclaims; + uint32_t max_page_reclaims; + + MDContent() + : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX), + max_page_reclaims(0) + { + // DO NOTHING + } + + virtual ~MDContent() = default; + + void updateRss(uint32_t rss) + { + if (min_rss == UINT32_MAX) + min_rss = rss; + if (max_rss == 0) + max_rss = rss; + + if (min_rss > rss) + min_rss = rss; + else if (max_rss < rss) + max_rss = rss; + } + + void updateMinflt(uint32_t minflt) + { + if (min_page_reclaims == UINT32_MAX) + min_page_reclaims = minflt; + if (max_page_reclaims == 0) + max_page_reclaims = minflt; + + if (min_page_reclaims > minflt) + min_page_reclaims = minflt; + else if (max_page_reclaims < minflt) + max_page_reclaims = minflt; + } + + virtual void write(std::ostream &os) const = 0; +}; + +struct Operation : public MDContent +{ + std::string backend; + uint64_t graph_latency; + + struct OperationCmp + { + bool operator()(const Operation &lhs, const Operation &rhs) const + { + return lhs.begin_ts < rhs.begin_ts; + } + bool operator()(const Operation &lhs, const Operation &rhs) + { + return lhs.begin_ts < rhs.begin_ts; + } + bool operator()(Operation &lhs, Operation &rhs) { return lhs.begin_ts < rhs.begin_ts; } + }; + + void write(std::ostream &os) const override + { + uint64_t op_latency = end_ts - begin_ts; + double op_per = static_cast<double>(op_latency) / graph_latency * 100.0; + writeMDTableRow(os, {name, backend, std::to_string(op_latency), std::to_string(op_per), + std::to_string(min_rss), std::to_string(max_rss), + std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)}); + } +}; + +struct Graph : public MDContent +{ + std::set<Operation, Operation::OperationCmp> ops; + std::string session_index; + std::string subgraph_index; + + void setOperations(const std::map<std::string, Operation> &name_to_op) + { + uint64_t graph_latency = end_ts - begin_ts; + for (auto &&it : name_to_op) + { + auto op = it.second; + op.graph_latency = graph_latency; + + ops.insert(op); + + updateRss(op.min_rss); + updateRss(op.max_rss); + updateMinflt(op.min_page_reclaims); + updateMinflt(op.max_page_reclaims); + } + } + + void write(std::ostream &os) const override + { + static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)", + "page_reclaims_min", "page_reclaims_max"}; + + static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------", + "-----------------", "-----------------"}; + + // Graph's Header + writeMDTableRow(os, graph_headers); + writeMDTableRow(os, graph_headers_line); + + // Graph's contents + writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss), + std::to_string(max_rss), std::to_string(min_page_reclaims), + std::to_string(max_page_reclaims)}); + + os << "\n"; + + static std::vector<std::string> op_headers{ + "Op name", "backend", "latency(us)", "latency(%)", + "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"}; + + static std::vector<std::string> op_headers_line{ + "-------", "-------", "-----------", "-----------", + "-------", "-------", "-----------------", "-----------------"}; + + os << "## Op \n"; + + // Operation's Header + writeMDTableRow(os, op_headers); + writeMDTableRow(os, op_headers_line); + + // Operation's contents + for (auto &&op : ops) + { + op.write(os); + } + + os << "\n"; + } +}; + +std::string getLabel(const OpSeqDurationEvent &evt) +{ + std::string subg_label("$" + std::to_string(evt.subg_index) + " subgraph"); + std::string op_label("@" + std::to_string(evt.op_index) + " " + evt.op_name); + + return subg_label + " " + op_label; +} + +struct MDTableBuilder +{ + MDTableBuilder(const std::vector<std::unique_ptr<DurationEvent>> &duration_events, + const std::vector<CounterEvent> &counter_events) + : _duration_events(duration_events), _counter_events(counter_events) + { +// when ready with low overhead in release build +#ifdef DEBUG + for (const auto &evt : _counter_events) + { + uint64_t ts = std::stoull(evt.ts); + auto &name = evt.name; + assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0); + assert(evt.values.size() == 1); + auto &val = evt.values.begin()->second; + if (_ts_to_values.find(ts) == _ts_to_values.end()) + { + std::pair<uint32_t, uint32_t> values; + if (name.compare("maxrss") == 0) + values.first = std::stoul(val); + else + values.second = std::stoul(val); + _ts_to_values.insert({ts, values}); + } + else + { + auto &values = _ts_to_values.at(ts); + if (name.compare("maxrss") == 0) + values.first = std::stoul(val); + else + values.second = std::stoul(val); + } + } +#endif + } + + MDTableBuilder &build() + { + for (const auto &it : divideGraph()) + { + size_t begin_idx = it.first; + size_t end_idx = it.second; + std::map<std::string, Operation> name_to_op; + for (size_t i = begin_idx + 1; i < end_idx; ++i) + { + const auto *evt = dynamic_cast<const OpSeqDurationEvent *>(_duration_events[i].get()); + if (evt == nullptr) + continue; + + const std::string evt_name = getLabel(*evt); + assert(evt->ph.compare("B") == 0 || evt->ph.compare("E") == 0); + if (evt->ph.compare("B") == 0) + { + assert(name_to_op.find(evt_name) == name_to_op.end()); + name_to_op.insert({evt_name, makeOperation(*evt)}); + } + else + { + assert(name_to_op.find(evt_name) != name_to_op.end()); + auto &op = name_to_op.at(evt_name); + updateOperation(op, *evt); + } + } + + _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_op)); + } + + return *this; + } + + std::vector<std::pair<size_t, size_t>> divideGraph() + { + std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx> + for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i) + { + const auto subg_evt = dynamic_cast<const SubgDurationEvent *>(_duration_events.at(i).get()); + if (subg_evt == nullptr) + continue; + + if (subg_evt->ph.compare("B") == 0) + begin_idx = i; + else + graph_idx_list.emplace_back(begin_idx, i); + } + return graph_idx_list; + } + + Operation makeOperation(const OpSeqDurationEvent &evt) + { + Operation op; + const std::string &evt_name = getLabel(evt); + op.name = evt_name; + op.begin_ts = std::stoull(evt.ts); + op.backend = evt.backend; +#ifdef DEBUG + op.updateRss(_ts_to_values.at(op.begin_ts).first); + op.updateMinflt(_ts_to_values.at(op.begin_ts).second); +#else + op.updateRss(0); + op.updateMinflt(0); +#endif + return op; + } + + void updateOperation(Operation &op, const DurationEvent &evt) + { + op.end_ts = std::stoull(evt.ts); +#ifdef DEBUG + op.updateRss(_ts_to_values.at(op.end_ts).first); + op.updateMinflt(_ts_to_values.at(op.end_ts).second); +#else + op.updateRss(0); + op.updateMinflt(0); +#endif + } + + Graph makeGraph(size_t begin_idx, size_t end_idx, + const std::map<std::string, Operation> &name_to_op) + { + Graph graph; + graph.name = "Subgraph"; + graph.begin_ts = std::stoull(_duration_events[begin_idx]->ts); + graph.end_ts = std::stoull(_duration_events[end_idx]->ts); + graph.setOperations(name_to_op); + + for (const auto &arg : _duration_events[end_idx]->args) + { + if (arg.first == "session") + graph.session_index = arg.second; + if (arg.first == "subgraph") + graph.subgraph_index = arg.second; + } + +#ifdef DEBUG + graph.updateRss(_ts_to_values.at(graph.begin_ts).first); + graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second); + graph.updateRss(_ts_to_values.at(graph.end_ts).first); + graph.updateMinflt(_ts_to_values.at(graph.end_ts).second); +#else + graph.updateRss(0); + graph.updateMinflt(0); +#endif + return graph; + } + + void write(std::ostream &os) + { + // Write contents + for (size_t i = 0; i < _graphs.size(); ++i) + { + auto &graph = _graphs.at(i); + os << "# Session: " << graph.session_index << ", Subgraph: " << graph.subgraph_index + << ", Running count: " << i << "\n"; + _graphs.at(i).write(os); + } + } + + const std::vector<std::unique_ptr<DurationEvent>> &_duration_events; + const std::vector<CounterEvent> &_counter_events; + + // timestamp to std::pair<maxrss, minflt> + std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values; + std::vector<Graph> _graphs; +}; + +} // namespace + +void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records) +{ + for (const auto &recorder : records) + { + MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os); + } +} diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc new file mode 100644 index 000000000..3fe735732 --- /dev/null +++ b/runtime/onert/core/src/util/ObjectManager.test.cc @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Index.h" +#include "util/ObjectManager.h" + +#include <gtest/gtest.h> + +using namespace onert; + +struct TestTag; +using Index = typename util::Index<uint32_t, TestTag>; + +TEST(ObjectManager, emplace) +{ + util::ObjectManager<Index, int> man; + + auto index = man.emplace(100); + ASSERT_EQ(man.at(index), 100); +} + +TEST(ObjectManager, neg_remove_1) +{ + util::ObjectManager<Index, int> man; + + Index index = man.emplace(100); + ASSERT_TRUE(man.exist(index)); + ASSERT_EQ(man.at(index), 100); + + man.remove(index); + ASSERT_FALSE(man.exist(index)); +} + +TEST(ObjectManager, neg_remove_2) +{ + util::ObjectManager<Index, int> man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + ASSERT_TRUE(man.exist(index0)); + ASSERT_EQ(man.at(index0), 100); + ASSERT_TRUE(man.exist(index1)); + ASSERT_EQ(man.at(index1), 200); + + man.remove(index0); + ASSERT_FALSE(man.exist(index0)); + ASSERT_TRUE(man.exist(index1)); + ASSERT_EQ(man.at(index1), 200); +} + +TEST(ObjectManager, push) +{ + util::ObjectManager<Index, int> man; + + // Not specify index + auto index = man.push(std::make_unique<int>(100)); + ASSERT_EQ(man.at(index), 100); + + // Specify index + auto index2 = man.push(std::make_unique<int>(200), Index{33}); + ASSERT_EQ(index2.value(), 33); + ASSERT_EQ(man.at(index2), 200); + + auto index3 = man.push(std::make_unique<int>(300)); + // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) + ASSERT_EQ(index3.value(), 34); + ASSERT_EQ(man.at(index3), 300); + + auto index4 = man.push(std::make_unique<int>(400), Index{22}); + ASSERT_EQ(index4.value(), 22); + ASSERT_EQ(man.at(index4), 400); + + auto index5 = man.push(std::make_unique<int>(500)); + // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) + ASSERT_EQ(index5.value(), 35); + ASSERT_EQ(man.at(index5), 500); +} + +TEST(ObjectManager, neg_push) +{ + util::ObjectManager<Index, int> man; + + // Specify index + auto index = man.push(std::make_unique<int>(100), Index{55}); + ASSERT_EQ(index.value(), 55); + ASSERT_EQ(man.at(index), 100); + + // Specify the same index + auto index2 = man.push(std::make_unique<int>(200), Index{55}); + ASSERT_FALSE(index2.valid()); +} + +static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max(); + +TEST(ObjectManager, neg_push_undefined_index) +{ + util::ObjectManager<Index, int> man; + + // Try inserting invalid(undefined) index + auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32}); + ASSERT_FALSE(index.valid()); + ASSERT_EQ(man.size(), 0); +} + +TEST(ObjectManager, neg_push_max_index) +{ + util::ObjectManager<Index, int> man; + + // Insert an object with maximum valid index + auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1}); + ASSERT_EQ(index.value(), kMaxUInt32 - 1); + ASSERT_EQ(man.at(index), 100); + ASSERT_EQ(man.size(), 1); + + // Reached to the final index so next push/emplace must fail + auto index2 = man.push(std::make_unique<int>(200)); + ASSERT_EQ(man.size(), 1); + ASSERT_FALSE(index2.valid()); +} + +TEST(ObjectManager, neg_emplace_max_index) +{ + util::ObjectManager<Index, int> man; + + // Insert an object with maximum valid index + auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1}); + ASSERT_EQ(index.value(), kMaxUInt32 - 1); + ASSERT_EQ(man.at(index), 100); + ASSERT_EQ(man.size(), 1); + + // Reached to the final index so next push/emplace must fail + auto index3 = man.emplace(200); + ASSERT_EQ(man.size(), 1); + ASSERT_FALSE(index3.valid()); +} + +TEST(ObjectManager, const_iterate) +{ + util::ObjectManager<Index, int> man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + auto index2 = man.emplace(300); + + int sum = 0; + man.iterate([&](const Index &index, const int &val) { sum += val; }); + ASSERT_EQ(sum, 600); +} + +TEST(ObjectManager, non_const_iterate) +{ + util::ObjectManager<Index, int> man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + auto index2 = man.emplace(300); + + man.iterate([&](const Index &index, int &val) { val += 1; }); + ASSERT_EQ(man.at(index0), 101); + ASSERT_EQ(man.at(index1), 201); + ASSERT_EQ(man.at(index2), 301); +} + +TEST(ObjectManager, set) +{ + util::ObjectManager<Index, int> man; + auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert + ASSERT_EQ(index, Index{1}); + auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite + ASSERT_EQ(index2, index); + ASSERT_EQ(man.at(index2), 200); +} + +TEST(ObjectManager, neg_set) +{ + auto v = std::make_unique<int>(100); + util::ObjectManager<Index, int> man; + auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index + ASSERT_EQ(index, Index{}); + ASSERT_FALSE(index.valid()); + ASSERT_NE(v, nullptr); // v must be kept when failure +} + +TEST(ObjectManager, getRawPtr) +{ + auto v = std::make_unique<int>(100); + auto v_ptr = v.get(); + util::ObjectManager<Index, int> man; + auto index = man.push(std::move(v)); + ASSERT_EQ(v_ptr, man.getRawPtr(index)); +} + +TEST(ObjectManager, neg_getRawPtr) +{ + util::ObjectManager<Index, int> man; + auto ptr = man.getRawPtr(Index{1}); + ASSERT_EQ(ptr, nullptr); +} diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc new file mode 100644 index 000000000..87bbfc662 --- /dev/null +++ b/runtime/onert/core/src/util/SNPEEventWriter.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EventWriter.h" + +#include <json/json.h> + +#include <cassert> +#include <unordered_map> +#include <utility> + +/** + * @brief Version of SNPE format + * In version 1 + * - There is no "version" field in Json + * - Only one subgraph is supported + * - Operation name is a form of "$3 ADD" + * + * In version 2, + * - "version" : "2" was added in Json + * - Multiple session and multiple subgraphs are supported + * - When there is only one session, operation name is a form of "$2 subgraph $3 ADD", + * meaning ADD op whose operation index 3 in a subgraph whose index is 2 + * - When there are two or more sessions, operation name is a form of + * "$1 session $2 subgraph $3 ADD", meaning ADD op whose operation index 3 + * in a subgraph whose index is 2, which was run in 1st session. + */ +#define SNPE_JSON_SCHEMA_VERSION "2" + +namespace +{ + +std::string getLabel(const DurationEvent &evt) +{ + if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt)) + { + std::string subg_label("$" + std::to_string(evt_ptr->subg_index) + " subgraph"); + std::string op_label("$" + std::to_string(evt_ptr->op_index) + " " + evt_ptr->op_name); + + // Note : At this moment, there is only one thread running for EventWriter + if (evt_ptr->tracing_ctx->hasMultipleSessions()) + { + std::string session_label("$" + std::to_string(evt_ptr->session_index) + " session"); + return session_label + " " + subg_label + " " + op_label; + } + else + { + // When there is only one session, do not include session info + // Refer to https://github.sec.samsung.net/STAR/nnfw/issues/11436#issuecomment-930332 + return subg_label + " " + op_label; + } + } + else // SubgEvent + return "Graph"; +} + +std::string getBackend(const DurationEvent &evt) +{ + if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt)) + return evt_ptr->backend; + else // SubbEvent + return "runtime"; +} + +} // namespace + +void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders) +{ + struct Stat + { + uint64_t sum = 0; + uint64_t count = 0; + uint64_t max = 0; + uint64_t min = std::numeric_limits<uint64_t>::max(); + + void accumulate(uint64_t val) + { + sum += val; + count++; + max = std::max(max, val); + min = std::min(min, val); + } + }; + + Json::Value root; + root["version"] = SNPE_JSON_SCHEMA_VERSION; + + auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue}; + + // Memory + { + std::unordered_map<std::string, Stat> mem_stats; + for (const auto &recorder : recorders) + { + for (const auto &evt : recorder->counter_events()) + { + auto &mem_stat = mem_stats[evt.name]; + uint64_t val = std::stoull(evt.values.at("value")); + mem_stat.accumulate(val); + } + } + + auto &mem = exec_data["memory"] = Json::Value{Json::objectValue}; + for (const auto &kv : mem_stats) + { + auto &key = kv.first; + auto &val = kv.second; + mem[key]["Avg_Size"] = val.sum / val.count; + mem[key]["Max_Size"] = val.max; + mem[key]["Min_Size"] = val.min; + mem[key]["Runtime"] = "NA"; + } + } + + // Operation Execution Time + { + // NOTE This assumes _duration_events is sorted by "ts" ascending + + // 2D keys : stats[tid][name] + std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats; + std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps; + for (const auto &recorder : recorders) + { + for (const auto &evt : recorder->duration_events()) + { + std::string evt_name = getLabel(*evt); + std::string evt_tid = getBackend(*evt); + + auto &stat = stats[evt_tid][evt_name]; + auto &begin_ts = begin_timestamps[evt_tid][evt_name]; + uint64_t timestamp = std::stoull(evt->ts); + if (evt->ph == "B") + { + if (begin_ts != 0) + throw std::runtime_error{"Invalid Data"}; + begin_ts = timestamp; + } + else if (evt->ph == "E") + { + if (begin_ts == 0 || timestamp < begin_ts) + throw std::runtime_error{"Invalid Data"}; + stat.accumulate(timestamp - begin_ts); + begin_ts = 0; + } + else + throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt->ph + "\""}; + } + } + + for (const auto &kv : begin_timestamps) + for (const auto &kv2 : kv.second) + if (kv2.second != 0) + throw std::runtime_error{"Invalid Data - B and E pair does not match."}; + + for (const auto &kv : stats) + { + const auto &tid = kv.first; + const auto &map = kv.second; + auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue}; + for (const auto &kv : map) + { + auto &name = kv.first; + auto &val = kv.second; + json_tid[name]["Avg_Time"] = val.sum / val.count; + json_tid[name]["Max_Time"] = val.max; + json_tid[name]["Min_Time"] = val.min; + json_tid[name]["Runtime"] = tid; + } + } + } + + _os << root; +} diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc index 95c15049d..862d6f725 100644 --- a/runtime/onert/core/src/util/ShapeInference.cc +++ b/runtime/onert/core/src/util/ShapeInference.cc @@ -22,6 +22,7 @@ #include "util/logging.h" #include <cassert> +#include <numeric> #include <sstream> #include <cmath> @@ -72,6 +73,19 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape } // namespace +namespace bcq +{ +inline int getOutputSize(const ir::Shape &cluster_shape, const int32_t *cluster_buf) +{ + int size = 0; + for (int idx = 0; idx < cluster_shape.dim(0); idx++) + { + size += cluster_buf[idx * 2 + 1]; + } + return size; +} +} // namespace bcq + // // Shape inference // @@ -97,10 +111,9 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c break; case ir::PaddingType::EXPLICIT: out_h = - (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1; + (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1; out_w = - (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + - 1; + (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + 1; break; default: assert(false); @@ -114,8 +127,13 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha return broadcastShapes(lhs_shape, rhs_shape); } -ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank) +ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank) { + if (axis < 0 || axis >= rank) + { + throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis)); + } + ir::Shape out_shape; for (int idx = 0; idx < rank; ++idx) { @@ -171,11 +189,12 @@ ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> for (int i = 0; i < num_axis; ++i) { int current = axes[i]; + if (!(-input_num_dims <= current && current < input_num_dims)) + throw std::runtime_error{"Invalid dim value " + std::to_string(current)}; if (current < 0) { current += input_num_dims; } - assert(0 <= current && current < input_num_dims); for (int j = 0; j < i; ++j) { int previous = axes[j]; @@ -259,19 +278,24 @@ ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs return output_shape; } -ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer) +/* + * shp_shape : SHAPE input tensor's shape + * shp_buf : SHAPE input tensor's buffer + */ +ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf) { - const int num_elements = wshape.num_elements(); + + const int num_elements = shp_shape.num_elements(); assert(num_elements != 0); - assert(shape_buffer); + assert(shp_buf); ir::Shape new_shape(num_elements); for (int i = 0; i < num_elements; ++i) { - assert(shape_buffer[i] != 0); // It shouldn't be 0. - new_shape.dim(i) = shape_buffer[i]; + assert(shp_buf[i] != 0); // It shouldn't be 0. + new_shape.dim(i) = shp_buf[i]; } return new_shape; @@ -305,6 +329,9 @@ ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat: ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape, const ir::operation::Conv2D::Param ¶m, ir::Layout layout) { + if (param.stride.horizontal == 0 || param.stride.vertical == 0) + throw std::runtime_error{"Conv2D: stride values must be positive"}; + auto ifm_shape = in_shape.asFeature(layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] @@ -321,6 +348,9 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape & const ir::operation::DepthwiseConv2D::Param ¶m, ir::Layout layout) { + if (param.stride.horizontal == 0 || param.stride.vertical == 0) + throw std::runtime_error{"DepthwiseConv2D: stride values must be positive"}; + assert(layout == ir::Layout::NHWC); auto ifm_shape = in_shape.asFeature(layout); @@ -330,7 +360,7 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape & assert(kf_shape.N == 1); const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W, - param.padding, param.stride); + param.padding, param.stride, param.dilation); return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C}; } @@ -354,18 +384,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis) return out_shape; } -ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buffer) +template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf) { - ir::Shape out_shape(in_shape.dim(0)); + ir::Shape out_shape(fill_shape.dim(0)); for (int out_x = 0; out_x < out_shape.rank(); ++out_x) { - out_shape.dim(out_x) = buffer[out_x]; + out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]); } return out_shape; } +// template instantiation +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf); +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf); + ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape) { assert(in_shape.rank() >= 2); @@ -380,11 +414,60 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k return {ir::Shape({static_cast<int32_t>(batch_size), num_units})}; } +ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape, + const int32_t *cluster_buf) +{ + assert(cluster_shape.rank() == 2); + assert(cluster_shape.dim(1) == 2); + + const auto input_size = in_shape.dim(1); + const auto output_size = bcq::getOutputSize(cluster_shape, cluster_buf); + + return {ir::Shape({output_size, input_size})}; +} + +ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape, + const int32_t *cluster_buf, int rank, + const ir::operation::BCQGather::Param ¶m) +{ + ir::Shape out_shape; + ir::Shape in_original_shape; + + assert(cluster_shape.rank() == 2); + assert(cluster_shape.dim(1) == 2); + + auto hidden_size = param.input_hidden_size; + auto axis = param.axis; + + in_original_shape.append(bcq::getOutputSize(cluster_shape, cluster_buf)); + in_original_shape.append(hidden_size); + + const int indices_rank = indices_shape.rank(); + for (int idx = 0; idx < rank; ++idx) + { + if (idx == (int)axis) + { + for (int indices_idx = 0; indices_idx < indices_rank; indices_idx++) + { + out_shape.append(indices_shape.dim(indices_idx)); + } + } + else + { + out_shape.append(in_original_shape.dim(idx)); + } + } + + return out_shape; +} + ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis, int rank) { ir::Shape out_shape; + const int indices_rank = indices_shape.rank(); + for (int idx = 0; idx < rank; ++idx) { if (idx == axis) @@ -470,6 +553,9 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param ¶m, const ir::Layout layout) { + if (param.stride.horizontal == 0 || param.stride.vertical == 0) + throw std::runtime_error{"Pool2D: stride values must be positive"}; + assert(layout == ir::Layout::NHWC); auto ifm_shape = in_shape.asFeature(layout); const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, @@ -482,6 +568,17 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp const int32_t output_width) { assert(in_shape.rank() == 4); + if (output_height < 0) + { + throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " + + std::to_string(output_height)}; + } + if (output_width < 0) + { + throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " + + std::to_string(output_width)}; + } + ir::Shape ret(in_shape.rank()); ret.dim(0) = in_shape.dim(0); @@ -497,9 +594,9 @@ template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delt ir::Shape out_shape(static_cast<int>(1)); out_shape.dim(0) = - (std::is_integral<T>::value - ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val)) - : std::ceil(std::abs((start_val - limit_val) / delta_val))); + (std::is_integral<T>::value + ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val)) + : std::ceil(std::abs((start_val - limit_val) / delta_val))); return out_shape; } @@ -511,12 +608,12 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el const size_t total_num_elements) { ir::Shape ret(shape_num_elements); - int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM; + int32_t flatten_dim = ir::Shape::kUnspecifiedDim; for (int32_t i = 0; i < shape_num_elements; ++i) { if (shape_buf[i] < 0) { - if (flatten_dim != ir::Shape::UNSPECIFIED_DIM) + if (flatten_dim != ir::Shape::kUnspecifiedDim) throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice"); flatten_dim = i; ret.dim(i) = 1; @@ -526,7 +623,7 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el ret.dim(i) = shape_buf[i]; } } - if (flatten_dim != ir::Shape::UNSPECIFIED_DIM) + if (flatten_dim != ir::Shape::kUnspecifiedDim) ret.dim(flatten_dim) = total_num_elements / ret.num_elements(); // Check reshapable @@ -566,9 +663,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i ir::Shape true_shape = input_true_shape; ir::Shape false_shape = input_false_shape; int most_rank = - (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank()) - ? cond_shape.rank() - : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank()); + (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank()) + ? cond_shape.rank() + : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank()); ir::Shape calculate_shape(most_rank); @@ -579,9 +676,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i for (int i = 0; i < most_rank; ++i) { calculate_shape.dim(i) = - (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i)) - ? cond_shape.dim(i) - : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i)); + (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i)) + ? cond_shape.dim(i) + : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i)); if ((cond_shape.dim(i) != calculate_shape.dim(i) && cond_shape.dim(i) != 1) || (true_shape.dim(i) != calculate_shape.dim(i) && true_shape.dim(i) != 1) || @@ -613,7 +710,8 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i return new_shape; } -ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, const int32_t *sizes) +template <typename T> +ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf) { const uint32_t rank = input_shape.rank(); ir::Shape out_shape(rank); @@ -623,12 +721,12 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c const auto input_dim = input_shape.dim(idx); // begin is zero-based - auto begin = begins[idx]; + auto begin = begins_buf[idx]; if (begin < 0) throw std::runtime_error("shape inference Slice: Invalid begin."); // size is one-based - auto size = sizes[idx]; + auto size = sizes_buf[idx]; if (size < -1) throw std::runtime_error("shape inference Slice: Invalid size."); @@ -638,18 +736,23 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c } else { - if (input_dim < begin + size) + if (input_dim < static_cast<int32_t>(begin + size)) throw std::runtime_error("shape inference Slice: Invalid begin and size."); } - out_shape.dim(idx) = size; + out_shape.dim(idx) = static_cast<int32_t>(size); } return out_shape; } +// template instantiation +template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf, + const int32_t *sizes_buf); +template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int64_t *begins_buf, + const int64_t *sizes_buf); ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape, - const ir::Shape &padding_shape, const int32_t *block_shape_data, - const int32_t *padding_data) + const ir::Shape &padding_shape, const int32_t *block_shape_buf, + const int32_t *padding_buf) { const uint32_t rank = input_shape.rank(); ir::Shape out_shape(rank); @@ -677,14 +780,14 @@ ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape for (int dim = 0; dim < kSpatialDimensionNum; ++dim) { int final_dim_size = - (input_shape.dim(dim + 1) + padding_data[dim * 2] + padding_data[dim * 2 + 1]); + (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]); - assert(final_dim_size % block_shape_data[dim] == 0); + assert(final_dim_size % block_shape_buf[dim] == 0); - out_shape.dim(dim + 1) = final_dim_size / block_shape_data[dim]; + out_shape.dim(dim + 1) = final_dim_size / block_shape_buf[dim]; } - const int output_batch_size = input_shape.dim(0) * block_shape_data[0] * block_shape_data[1]; + const int output_batch_size = input_shape.dim(0) * block_shape_buf[0] * block_shape_buf[1]; const int output_channel_size = input_shape.dim(3); out_shape.dim(0) = output_batch_size; @@ -740,7 +843,7 @@ ir::Shape inferSqueezeShape(const ir::Shape &in_shape, const ir::operation::Sque if (!(current >= 0 && current < shape_rank && in_shape.dim(current) == 1)) { throw std::runtime_error( - "The following conditions must be met: 0 <= dim < Shape rank, dim == 1"); + "The following conditions must be met: 0 <= dim < Shape rank, dim == 1"); } if (!should_squeeze[current]) @@ -948,35 +1051,71 @@ ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSlic return out_shape; } -ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier) +ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf, + const int32_t multiplier_size) { - // assert(in_shape.rank() == multiplier.rank()); + if (multiplier_size != in_shape.rank()) + { + throw std::runtime_error( + "inferTileShape failed, input rank: " + std::to_string(in_shape.rank()) + + ", bad multipliers size: " + std::to_string(multiplier_size) + ""); + } ir::Shape new_Shape(in_shape.rank()); for (int i = 0; i < in_shape.rank(); ++i) { - assert(multiplier[i]); // multiplier[i] shuld not be 0. - new_Shape.dim(i) = in_shape.dim(i) * multiplier[i]; + assert(multiplier_buf[i]); // multiplier_buf[i] shuld not be 0. + new_Shape.dim(i) = in_shape.dim(i) * multiplier_buf[i]; } return new_Shape; } -ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm) +ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf, + const int32_t perm_size) { - if (static_cast<int>(perm.size()) > in_shape.rank()) + const auto rank = in_shape.rank(); + if (perm_size > rank) { - throw std::runtime_error("inferTransposeShape failed, bad rank size: " + - std::to_string(static_cast<int>(perm.size()))); + throw std::runtime_error("inferTransposeShape failed, bad permutation size: " + + std::to_string(perm_size)); } - ir::Shape out_shape(static_cast<int>(perm.size())); - for (int idx = 0; idx < static_cast<int>(perm.size()); idx++) + + const int32_t *perm_data = perm_buf; + std::vector<int32_t> regular_perm_vec; + if (perm_size == 0) + { + // perm_data will be set to (n-1...0) + regular_perm_vec.resize(rank); + std::iota(regular_perm_vec.begin(), regular_perm_vec.end(), 0); + std::reverse(regular_perm_vec.begin(), regular_perm_vec.end()); + perm_data = regular_perm_vec.data(); + } + else { - if (perm[idx] < 0 || perm[idx] >= static_cast<int>(perm.size())) + assert(rank == perm_size); + } + + ir::Shape out_shape(rank); + std::vector<bool> visit_perms(rank, false); + for (int idx = 0; idx < rank; idx++) + { + const auto perm_val = perm_data[idx]; + // Check invalid permutation value + if (perm_val < 0 || perm_val >= rank) { - throw std::runtime_error("inferTransposeShape failed, bad perm value: " + - std::to_string(perm[idx])); + throw std::runtime_error("inferTransposeShape failed, bad permutation value: " + + std::to_string(perm_val)); } - out_shape.dim(idx) = in_shape.dim(perm[idx]); + + // Check duplicated permutation value + if (visit_perms.at(perm_val)) + { + throw std::runtime_error("inferTransposeShape failed, duplicated permutation value: " + + std::to_string(perm_val)); + } + visit_perms.at(perm_val) = true; + + out_shape.dim(idx) = in_shape.dim(perm_val); } return out_shape; } diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc new file mode 100644 index 000000000..96579bfa2 --- /dev/null +++ b/runtime/onert/core/src/util/ShapeInference.test.cc @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/ShapeInference.h" + +#include <gtest/gtest.h> + +using namespace onert::ir; + +TEST(ShapeInference, Elementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{3}; + auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.dim(0), 1); + ASSERT_EQ(infered_out_shape.dim(1), 299); + ASSERT_EQ(infered_out_shape.dim(2), 299); + ASSERT_EQ(infered_out_shape.dim(3), 3); +} + +TEST(ShapeInference, neg_Elementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{5, 3}; + ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error); +} + +TEST(ShapeInference, Pool2DNodeSame) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::SAME}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeValid) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::VALID}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeExplicit) +{ + Shape in_shape{10, 3, 5, 20}; + + Stride stride{3, 7}; + Padding padding{4, 3, 2, 1}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, neg_Pool2DNode_InvalidStride) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{0, 7}; + Padding padding{PaddingType::SAME}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param), + std::runtime_error); +} + +TEST(ShapeInference, Conv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{30, 3, 6, 20}; + + operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE, + Dilation{1, 1}}; + auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE, + Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = + operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); +} + +TEST(ShapeInference, neg_Conv2D_InvalidStride) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{30, 3, 6, 20}; + + operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE, + Dilation{1, 1}}; + ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param), + std::runtime_error); +} + +TEST(ShapeInference, DepthwiseConv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{1, 3, 6, 60}; + + operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, + Activation::NONE, Dilation{1, 1}}; + auto infered_out_shape = + onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3, + Activation::NONE, Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE, + Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); +} + +TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{1, 3, 6, 60}; + + operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3, + Activation::NONE, Dilation{1, 1}}; + ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param), + std::runtime_error); +} + +TEST(ShapeInference, Concat) +{ + { + Shape in1{10, 20, 30, 3, 50}; + Shape in2{10, 20, 30, 2, 50}; + Shape in3{10, 20, 30, 2, 50}; + + operation::Concat::Param param{3}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param); + + ASSERT_EQ(infered_out_shape.rank(), 5); + ASSERT_EQ(infered_out_shape.dim(0), 10); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 30); + ASSERT_EQ(infered_out_shape.dim(3), 7); + ASSERT_EQ(infered_out_shape.dim(4), 50); + } + { + // case 1. when axis < 0 + Shape in1{10, 20, 2}; + Shape in2{10, 20, 3}; + + operation::Concat::Param param{-1}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); + + ASSERT_EQ(infered_out_shape.rank(), 3); + ASSERT_EQ(infered_out_shape.dim(0), 10); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 5); + } + { + // case 2. when axis < 0 + Shape in1{2, 20, 2}; + Shape in2{3, 20, 2}; + + operation::Concat::Param param{-3}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); + + ASSERT_EQ(infered_out_shape.rank(), 3); + ASSERT_EQ(infered_out_shape.dim(0), 5); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 2); + } +} + +TEST(ShapeInference, neg_Concat) +{ + { + operation::Concat::Param param{2}; + Shape in1{10, 1, 3}; + Shape in2{10, 2, 4}; // dim[1] should be 1 but 2 + + EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); + } + { // wrong rank + operation::Concat::Param param{2}; + Shape in1{10, 2, 3, 4}; + Shape in2{10, 2, 4}; // rank should be 4 + + EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); + } +} + +TEST(ShapeInference, ExpandDims) +{ + Shape in_shape{30, 40}; + + auto check = [&](int32_t axis, Shape &expected) { + auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis); + + ASSERT_EQ(actual.rank(), 3); + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { // boundary + int32_t axis = 0; + Shape expected{1, 30, 40}; + check(axis, expected); + } + { // boundary + int32_t axis = 2; + Shape expected{30, 40, 1}; + check(axis, expected); + } + { // inside + int32_t axis = 1; + Shape expected{30, 1, 40}; + check(axis, expected); + } + { // negative boundary + int32_t axis = -1; + Shape expected{30, 40, 1}; + check(axis, expected); + } + { // negative boundary + int32_t axis = -3; + Shape expected{1, 30, 40}; + check(axis, expected); + } +} + +TEST(ShapeInference, neg_ExpandDims) +{ + Shape in_shape{30, 40}; + + { // over boundary + int32_t axis = 3; + ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); + } + { // over boundary + int32_t axis = -4; + ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); + } +} + +TEST(ShapeInference, FullyConnected) +{ + Shape in_shape{3, 4, 5, 6}; + Shape ker_shape{3, 10}; + auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape); + + ASSERT_EQ(infered_out_shape.rank(), 2); + ASSERT_EQ(infered_out_shape.dim(0), 36); + ASSERT_EQ(infered_out_shape.dim(1), 3); +} + +TEST(ShapeInference, Transpose) +{ + auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) { + // pre-conditions + ASSERT_EQ(in_shape.rank(), perm.size()); + ASSERT_EQ(expected.rank(), perm.size()); + auto inferred_out_shape = + onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); + // post-conditions + ASSERT_EQ(inferred_out_shape.rank(), perm.size()); + for (int32_t dim = 0; dim < expected.rank(); dim++) + { + ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim)); + } + }; + // check for 2-D + { + Shape in_shape{2, 3}; + std::vector<int> perm = {1, 0}; + Shape expected{3, 2}; + // int32_t rank = 2; + check(in_shape, perm, expected); + } + // check for 3-D + { + Shape in_shape{1, 2, 3}; + std::vector<int> perm = {2, 0, 1}; + Shape expected{3, 1, 2}; + // int32_t rank = 3; + check(in_shape, perm, expected); + } + // check for 4-D + { + Shape in_shape{1, 2, 3, 4}; + std::vector<int> perm = {1, 3, 0, 2}; + Shape expected{2, 4, 1, 3}; + // int32_t rank = 4; + check(in_shape, perm, expected); + } +} + +TEST(ShapeInference, neg_Transpose) +{ + Shape in_shape{1, 2, 3}; + // Invalid parameter size + { + std::vector<int> perm = {2, 0, 1, 0}; + // int32_t rank = 3; + ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), + std::runtime_error); + } + // Invalid parameter value + { + std::vector<int> perm = {2, 0, 3}; + // int32_t rank = 3; + ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), + std::runtime_error); + } +} + +TEST(ShapeInference, Gather) +{ + auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) { + int rank = input.rank(); + auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank); + + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + // check for 2-D, 3-D, axis 0 + { + Shape input{3, 4}; + Shape indices{1, 1, 2}; + int32_t axis = 0; + Shape expected{1, 1, 2, 4}; + check(input, indices, expected, axis); + } + + // check for 2-D, 3-D, axis 1 + { + Shape input{3, 4}; + Shape indices{1, 2, 1}; + int32_t axis = 1; + Shape expected{3, 1, 2, 1}; + check(input, indices, expected, axis); + } + + // check for 3-D, 2-D, axis 0 + { + Shape input{2, 3, 4}; + Shape indices{1, 2}; + int32_t axis = 0; + Shape expected{1, 2, 3, 4}; + check(input, indices, expected, axis); + } + + // check for 3-D, 2-D, axis 2 + { + Shape input{2, 3, 4}; + Shape indices{2, 1}; + int32_t axis = 2; + Shape expected{2, 3, 2, 1}; + check(input, indices, expected, axis); + } + + // check for 4D, axis 0 + { + Shape input{1, 2, 3, 4}; + Shape indices{2}; + int32_t axis = 0; + Shape expected{2, 2, 3, 4}; + check(input, indices, expected, axis); + } +} + +TEST(ShapeInference, BCQFullyConnected) +{ + auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster, + Shape &expected) { + auto actual = + onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data()); + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { + Shape in_shape{10, 1}; + Shape cluster_shape{3, 2}; + std::vector<int> cluster = {1, 10, 2, 10, 3, 10}; + + Shape expected{30, 1}; + check(in_shape, cluster_shape, cluster, expected); + } + + { + Shape in_shape{1, 1}; + Shape cluster_shape{1, 2}; + std::vector<int> cluster = {3, 50}; + + Shape expected{50, 1}; + check(in_shape, cluster_shape, cluster, expected); + } +} + +TEST(ShapeInference, BCQGather) +{ + auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster, + uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) { + operation::BCQGather::Param param{hidden_size, axis}; + auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape, + cluster.data(), rank, param); + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { + Shape indices_shape{5, 1}; + Shape cluster_shape{3, 2}; + std::vector<int> cluster = {1, 10, 2, 10, 3, 10}; + uint32_t hidden_size = 10; + uint32_t axis = 0; + int rank = 2; + + Shape expected{5, 1, 10}; + check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); + } + + { + Shape indices_shape{5, 1}; + Shape cluster_shape{3, 2}; + std::vector<int> cluster = {1, 10, 2, 10, 3, 10}; + uint32_t hidden_size = 10; + uint32_t axis = 1; + int rank = 2; + + Shape expected{30, 5, 1}; + check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); + } +} diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc new file mode 100644 index 000000000..c05baee60 --- /dev/null +++ b/runtime/onert/core/src/util/TracingCtx.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/TracingCtx.h" + +namespace onert +{ +namespace util +{ + +// initializing static member var +std::mutex TracingCtx::_session_id_mutex; +uint32_t TracingCtx::_next_session_id = 0; + +} // namespace util +} // namespace onert |