diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2022-04-15 19:15:11 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2022-04-15 19:15:11 +0900 |
commit | 3ad689f0803519e343c36d5700646e86059df961 (patch) | |
tree | 862346c401a5577518fa7f042532aa931b53aa0e /runtime/onert | |
parent | ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff) | |
download | nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2 nnfw-3ad689f0803519e343c36d5700646e86059df961.zip |
Imported Upstream version 1.20.0upstream/1.20.0submit/tizen/20220415.103159
Diffstat (limited to 'runtime/onert')
254 files changed, 7891 insertions, 27191 deletions
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt index b238b1f89..beb243a4d 100644 --- a/runtime/onert/api/CMakeLists.txt +++ b/runtime/onert/api/CMakeLists.txt @@ -11,6 +11,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h) target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header) target_link_libraries(${ONERT_DEV} PRIVATE onert_core) target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD}) +target_link_libraries(${ONERT_DEV} PRIVATE trix_loader) target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common) target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage) # NOTE Below line is added to remove warning for android build diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index 6eb7e6ba9..6f296a931 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -92,6 +92,15 @@ typedef enum */ NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6, + /** + * A tensor of 16 bit signed integers that represent real numbers. + * + * real_value = (integer_value - zeroPoint) * scale. + * + * Forced to have zeroPoint equal to 0. + */ + NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7, + } NNFW_TYPE; /** diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index b885a6b90..45b34716a 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01001300 +#define NNFW_VERSION 0x01001400 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index b69dd83e4..0ebd385e9 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -28,6 +28,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_BOOL, 3); STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_UINT8, 4); STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_INT64, 5); STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED, 6); +STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED, 7); STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0); STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1); diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index 1a3aaf9e9..62a043921 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -23,6 +23,7 @@ #include "exec/Execution.h" #include "circle_loader.h" #include "tflite_loader.h" +#include "trix_loader.h" #include "json/json.h" #include "ir/OpCode.h" #include "util/TracingCtx.h" @@ -155,6 +156,45 @@ void setConfigKeyValues(const CfgKeyValues &keyValues) onert::util::config_source_ext(std::move(configsrc)); } +NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt) +{ + using onert::ir::DataType; + switch (dt) + { + case DataType::FLOAT32: + return NNFW_TYPE_TENSOR_FLOAT32; + case DataType::INT32: + return NNFW_TYPE_TENSOR_INT32; + case DataType::QUANT_UINT8_ASYMM: + return NNFW_TYPE_TENSOR_QUANT8_ASYMM; + case DataType::BOOL8: + return NNFW_TYPE_TENSOR_BOOL; + case DataType::UINT8: + return NNFW_TYPE_TENSOR_UINT8; + case DataType::INT64: + return NNFW_TYPE_TENSOR_INT64; + case DataType::QUANT_INT8_ASYMM: + return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED; + case DataType::QUANT_INT16_SYMM: + return NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED; + case DataType::UINT32: + case DataType::QUANT_INT8_SYMM: + default: + throw std::runtime_error("Error: Model has type that runtime API does not support."); + } +} + +void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape, + const onert::ir::DataType &dtype) +{ + ti->rank = shape.rank(); + for (int j = 0; j < ti->rank; ++j) + { + ti->dims[j] = shape.dim(j); + } + ti->dtype = datatype_to_nnfw_dtype(dtype); +} + } // namespace nnfw_session::nnfw_session() @@ -225,6 +265,10 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path) { _subgraphs = onert::circle_loader::loadModel(filename.c_str()); } + else if (model_type == ".tvn") + { + _subgraphs = onert::trix_loader::loadModel(filename.c_str()); + } else { std::cerr << "Unsupported model type" << std::endl; @@ -307,6 +351,10 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir) { _subgraphs = onert::circle_loader::loadModel(model_file_path); } + else if (model_type == "tvn") + { + _subgraphs = onert::trix_loader::loadModel(model_file_path); + } else { std::cerr << "Unsupported model type in MANIFEST" << std::endl; @@ -657,32 +705,6 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout) return NNFW_STATUS_NO_ERROR; } -static NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt) -{ - using onert::ir::DataType; - switch (dt) - { - case DataType::FLOAT32: - return NNFW_TYPE_TENSOR_FLOAT32; - case DataType::INT32: - return NNFW_TYPE_TENSOR_INT32; - case DataType::QUANT_UINT8_ASYMM: - return NNFW_TYPE_TENSOR_QUANT8_ASYMM; - case DataType::BOOL8: - return NNFW_TYPE_TENSOR_BOOL; - case DataType::UINT8: - return NNFW_TYPE_TENSOR_UINT8; - case DataType::INT64: - return NNFW_TYPE_TENSOR_INT64; - case DataType::QUANT_INT8_ASYMM: - return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED; - case DataType::UINT32: - case DataType::QUANT_INT8_SYMM: - default: - throw std::runtime_error("Error: Model has type that runtime API does not support."); - } -} - NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti) { // sanity check @@ -769,22 +791,11 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti) auto shape = primary_subgraph()->operands().at(opidx).shape(); if (isStatePreparedOrFinishedRun()) { - if (_execution) - { - shape = _execution->getInputShape(onert::ir::IOIndex{index}); - } - else - { - shape = _executions.at(0)->getInputShape(onert::ir::IOIndex{index}); - } - } - - ti->rank = shape.rank(); - for (int j = 0; j < ti->rank; ++j) - { - ti->dims[j] = shape.dim(j); + shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index}) + : _executions.at(0)->getInputShape(onert::ir::IOIndex{index}); } - ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type()); + auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type(); + fillTensorInfo(ti, shape, dtype); } catch (const std::exception &e) { @@ -820,21 +831,12 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti) // If it is called after `nnfw_run` then get the shape from Execution, not from the graph if (isStateFinishedRun()) { - if (_execution) - { - shape = _execution->getOutputShape(onert::ir::IOIndex{index}); - } - else - { - shape = _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index}); - } - } - ti->rank = shape.rank(); - for (int j = 0; j < ti->rank; ++j) - { - ti->dims[j] = shape.dim(j); + shape = _execution + ? _execution->getOutputShape(onert::ir::IOIndex{index}) + : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index}); } - ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type()); + auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type(); + fillTensorInfo(ti, shape, dtype); } catch (const std::exception &e) { diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt index 4b21e0ace..c43160ba7 100644 --- a/runtime/onert/backend/CMakeLists.txt +++ b/runtime/onert/backend/CMakeLists.txt @@ -1,9 +1,14 @@ +# Backend common libs set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common) +set(LIB_ONERT_BACKEND_CL_COMMON onert_backend_cl_common) +add_subdirectory(cl_common) +add_subdirectory(acl_common) +# Backends add_subdirectory(cpu) add_subdirectory(acl_cl) add_subdirectory(acl_neon) -add_subdirectory(acl_common) add_subdirectory(ruy) add_subdirectory(gpu_cl) add_subdirectory(xnnpack) +add_subdirectory(trix) diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc deleted file mode 100644 index 5595043ca..000000000 --- a/runtime/onert/backend/acl_cl/BackendContext.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "BackendContext.h" - -#include "TensorBuilder.h" -#include "KernelGenerator.h" -#include "Optimizer.h" -#include "util/logging.h" -#include "ir/Index.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperandIndexSequence.h" - -namespace onert -{ -namespace backend -{ -namespace acl_cl -{ - -void BackendContext::initConsts() -{ - _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { - constant_initializer->setLayout(graph()->layout()); - op.accept(*constant_initializer); - }); - - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - if (_data.external_operands.contains(ind) || !operand.isConstant()) - return; - const auto &obj = graph()->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - }); - - constant_initializer->run(); -} - -void BackendContext::planTensors() -{ - ir::OperandIndexMap<uint32_t> uses_map; - ir::OperandIndexMap<uint32_t> def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (_data.external_operands.contains(ind)) - return; - - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - if (obj.isConstant()) - constants.append(ind); - - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any operation (No use and def) - const auto info = obj.info(); - const auto layout = _data.operand_layouts.at(ind); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, layout); - } - }); - - // Start scanning to do notify{First|Last}Use for each tensor - - // If a tensor is a constant, increase the use of the tensor and allocate it first. - // Increasing use count here makes the tensor never be deallocated, i.e it they will be - // deallocated last. - VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) - { - uses_map[ind]++; - tensor_builder->notifyFirstUse(ind); - } - - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan DEF of inputs. If variable tensor, allocate it - // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto op_ind : _data.op_order) - { - const auto &op = graph()->operations().at(op_ind); - auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - - // Define outputs - for (const auto &ind : op_outputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder->notifyFirstUse(ind); - } - } - - // Scan variable tensors - // This tensor has features like constant. But OperandInfo and LowerInfo treat them as - // non-constant because of less memory usage by memory planning in here - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - const auto &operand = graph()->operands().at(ind); - if (operand.info().isVariable()) - { - // The variable tensor with buffer is not supported yet - assert(operand.data() == nullptr); - assert(operand.getUses().size() == 1 && !operand.getDef().valid()); - assert(uses_map[ind] == 1 && def_map[ind] == 0); - tensor_builder->notifyFirstUse(ind); - } - } - - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder->notifyLastUse(ind); - } - } - } - - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - if (uses_map[ind] == 0) - { - tensor_builder->notifyLastUse(ind); - } - }); - - // Dispose and validate - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); -} - -ITensorRegistry *BackendContext::genTensors() -{ - optimizer->optimize(); - - graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (external_operands().contains(ind)) - return; - - const auto frontend_layout = graph()->layout(); - const auto backend_layout = operand_layouts().at(ind); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; - tensor_builder->registerTensorInfo(ind, backend_info, backend_layout); - }); - - // TODO Get compiler options from compiler, and use it rather than getting it from Env - if (util::getConfigString(util::config::EXECUTOR) == "Linear") - { - planTensors(); - } - else - { - // For the executors that does not have fixed linear execution order: - // To make tensors never be deallocated, this is a workaround to use static memory planner - graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - if (tensor_builder->isRegistered(ind)) - tensor_builder->notifyFirstUse(ind); - }); - } - - tensor_builder->prepare(); - - return tensor_registry.get(); -} - -FunctionMap BackendContext::genKernels() -{ - FunctionMap ret; - - for (auto op_ind : _data.op_order) - { - auto fn_seq = kernel_gen->generate(op_ind); - ret.emplace_back(op_ind, std::move(fn_seq)); - } - - tensor_builder->allocate(); - initConsts(); - - // NOTE For memory optimization, we want to free some operand data - const_cast<ir::Graph &>(*_data.graph) - .operands() - .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - for (auto &it : ret) - { - auto &fn_seq = it.second; - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - tensor_builder->postFunctionPrepare(); - }); - } - - return ret; -} - -} // namespace acl_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h index 2638046ca..5da915825 100644 --- a/runtime/onert/backend/acl_cl/BackendContext.h +++ b/runtime/onert/backend/acl_cl/BackendContext.h @@ -17,10 +17,11 @@ #ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ #define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ -#include <backend/BackendContext.h> -#include "TensorBuilder.h" +#include <AclBackendContext.h> + #include "ConstantInitializer.h" #include "KernelGenerator.h" +#include "TensorBuilder.h" namespace onert { @@ -31,33 +32,8 @@ namespace acl_cl class Optimizer; -class BackendContext : public onert::backend::BackendContext -{ -public: - BackendContext(const Backend *backend, ContextData &&data, - std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<TensorBuilder> tensor_builder = nullptr, - std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<KernelGenerator> kernel_gen = nullptr) - : onert::backend::BackendContext(backend, std::move(data), tensor_registry), - tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{ - kernel_gen} - { - } - - ITensorRegistry *genTensors() override; - FunctionMap genKernels() override; - -private: - void initConsts(); - void planTensors(); - -public: - std::shared_ptr<TensorBuilder> tensor_builder; - std::shared_ptr<ConstantInitializer> constant_initializer; - std::shared_ptr<KernelGenerator> kernel_gen; - std::shared_ptr<Optimizer> optimizer; -}; +using BackendContext = + acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>; } // namespace acl_cl } // namespace backend diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index 54b2a7a08..0431bb198 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -58,21 +58,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) if (block_size_obj.isConstant()) { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; + _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>; } const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS); diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc index 12e805ee5..a9ce888ee 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.cc +++ b/runtime/onert/backend/acl_cl/Optimizer.cc @@ -16,12 +16,12 @@ #include "Optimizer.h" -#include "ParentInfo.h" +#include <AclSubTensorAnalyzer.h> -#include <cassert> #include <compiler/LoweredGraph.h> #include <util/logging.h> -#include "AclSubTensorAnalyzer.h" + +#include <cassert> namespace onert { diff --git a/runtime/onert/backend/acl_common/AclBackendContext.h b/runtime/onert/backend/acl_common/AclBackendContext.h new file mode 100644 index 000000000..b8d027476 --- /dev/null +++ b/runtime/onert/backend/acl_common/AclBackendContext.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include <ir/Index.h> +#include <ir/OperandIndexMap.h> +#include <ir/OperandIndexSequence.h> +#include <util/logging.h> + +#include <cl_common/BackendContext.h> + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +// TODO Find better way to handle common code (reduce template) +template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator, + typename T_Optimizer> +class AclBackendContext + : public onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer, + T_KernelGenerator> +{ +public: + AclBackendContext(const Backend *backend, ContextData &&data, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr) + : onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer, + T_KernelGenerator>( + backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen) + { + // DO NOTHING + } + + ITensorRegistry *genTensors() override + { + optimizer->optimize(); + + this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (this->external_operands().contains(ind)) + return; + + const auto frontend_layout = this->graph()->layout(); + const auto backend_layout = this->operand_layouts().at(ind); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + this->tensor_builder->registerTensorInfo(ind, backend_info, backend_layout); + }); + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + this->planTensors(); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { + if (this->tensor_builder->isRegistered(ind)) + this->tensor_builder->notifyFirstUse(ind); + }); + } + + this->tensor_builder->prepare(); + + return this->tensor_registry.get(); + } + +protected: + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout) override + { + this->tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + +public: + // TODO Make it private + std::shared_ptr<T_Optimizer> optimizer; +}; + +} // namespace acl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h index b7f66b50e..65659ad50 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.h +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -153,6 +153,23 @@ void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &o Init<T>(model_obj, obj, copy, frontend_layout); } +// Pre-defined initializer - fill reverse order +template <typename T> void initReverseOrder(const ir::Operand &model_obj, backend::ITensor &obj) +{ + assert(model_obj.data()); + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const T *>(model_obj.data()->base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const T value = base[shape.num_elements() - i - 1]; + T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)})); + *into = value; + } + }); +} + class AclConstantInitializer : public ir::OperationVisitor { public: diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h index 60f4ebf7e..a0bbe7c3c 100644 --- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h +++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h @@ -17,9 +17,10 @@ #ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__ #define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__ +#include <cl_common/ParentInfo.h> + #include <ir/OperationVisitor.h> #include <ir/Graph.h> -#include "ParentInfo.h" namespace onert { @@ -94,21 +95,21 @@ public: } coordinate_info.set(axis, axis_point); - _parent_map.emplace( - input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info}); + _parent_map.emplace(input_index, + cl_common::ParentInfo{output_index, _current_op_layout, coordinate_info}); axis_point += input_shape.dim(axis); } } - std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap() + std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&releaseParentMap() { return std::move(_parent_map); } private: const ir::Graph &_graph; - std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map; + std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> _parent_map; ir::Layout _current_op_layout{ir::Layout::UNKNOWN}; bool usePadding{false}; }; diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index 7c1c5dd9a..e008fd6f5 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -17,18 +17,21 @@ #ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ #define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ -#include <memory> -#include <queue> - -#include <arm_compute/core/Types.h> -#include "ir/OperandIndexMap.h" -#include <ir/Operands.h> #include "AclTensorManager.h" #include "AclTensorRegistry.h" -#include <memory> -#include "ParentInfo.h" + +#include <cl_common/LifetimeMap.h> +#include <cl_common/ParentInfo.h> + +#include <ir/OperandIndexMap.h> +#include <ir/Operands.h> #include <util/Utils.h> +#include <arm_compute/core/Types.h> + +#include <memory> +#include <queue> + namespace onert { namespace backend @@ -36,16 +39,12 @@ namespace backend namespace acl_common { -enum class UsesType -{ - FIRST, - LAST -}; - template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder { public: using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; + // TODO Remove this alias and direct usage of this type + using UsesType = cl_common::UsesType; AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); @@ -76,7 +75,7 @@ public: _uses_count_map[index] = num_uses; } - void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map) + void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map) { _parent_map = std::move(parent_map); } @@ -104,10 +103,10 @@ private: std::unique_ptr<T_AclTensorManager> _tensor_mgr; // for linear executor - std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; + cl_common::LifetimeSeq _lifetime_seq; // Extra info for concat elimination - ir::OperandIndexMap<ParentInfo> _parent_map; + ir::OperandIndexMap<cl_common::ParentInfo> _parent_map; }; } // namespace acl_common @@ -217,55 +216,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void) template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void) { - // Update lifetime sequence to apply subtensor optimization - - std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map; - std::function<ir::OperandIndex &(ir::OperandIndex)> find_root = - [&](ir::OperandIndex ind) -> ir::OperandIndex & { - ir::OperandIndex &ret = root_map[ind]; - - // We know the root parent value already - if (ret.valid()) - return ret; - - auto itr = _parent_map.find(ind); - if (itr == _parent_map.end()) - { - // If there is no parent, let's store the value of itself - return ret = ind; - } - else - { - return ret = find_root(itr->second.parent); - } - }; - - ir::OperandIndexMap<bool> first_use_check; - ir::OperandIndexMap<bool> last_use_check; - std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map; - for (size_t i = 0; i < _lifetime_seq.size(); i++) - { - auto &entry = _lifetime_seq[i]; - if (entry.first != UsesType::FIRST) - continue; - auto root_ind = find_root(entry.second); - if (first_use_check[root_ind]) - continue; - first_use_check[root_ind] = true; - lifetime_map[i] = {UsesType::FIRST, root_ind}; - } - - for (int i = _lifetime_seq.size() - 1; i >= 0; i--) - { - auto &entry = _lifetime_seq[i]; - if (entry.first != UsesType::LAST) - continue; - auto root_ind = find_root(entry.second); - if (last_use_check[root_ind]) - continue; - last_use_check[root_ind] = true; - lifetime_map[i] = {UsesType::LAST, root_ind}; - } + auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map); for (auto &entry : lifetime_map) { diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt index d3ae5acf7..8d409a47c 100644 --- a/runtime/onert/backend/acl_common/CMakeLists.txt +++ b/runtime/onert/backend/acl_common/CMakeLists.txt @@ -12,6 +12,7 @@ target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURREN target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core) target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex) target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc) +target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${LIB_ONERT_BACKEND_CL_COMMON}) target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common) target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage) diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc deleted file mode 100644 index 4de3de02d..000000000 --- a/runtime/onert/backend/acl_neon/BackendContext.cc +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "BackendContext.h" - -#include "TensorBuilder.h" -#include "KernelGenerator.h" -#include "Optimizer.h" -#include "util/logging.h" -#include "ir/Index.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperandIndexSequence.h" - -namespace onert -{ -namespace backend -{ -namespace acl_neon -{ - -void BackendContext::initConsts() -{ - _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { - constant_initializer->setLayout(graph()->layout()); - op.accept(*constant_initializer); - }); - - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - if (_data.external_operands.contains(ind) || !operand.isConstant()) - return; - const auto &obj = graph()->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - }); - - constant_initializer->run(); -} - -void BackendContext::planTensors() -{ - ir::OperandIndexMap<uint32_t> uses_map; - ir::OperandIndexMap<uint32_t> def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (_data.external_operands.contains(ind)) - return; - - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - if (obj.isConstant()) - constants.append(ind); - - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any operation (No use and def) - const auto info = obj.info(); - const auto layout = _data.operand_layouts.at(ind); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, layout); - } - }); - - // Start scanning to do notify{First|Last}Use for each tensor - - // If a tensor is a constant, increase the use of the tensor and allocate it first. - // Increasing use count here makes the tensor never be deallocated, i.e it they will be - // deallocated last. - VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) - { - uses_map[ind]++; - tensor_builder->notifyFirstUse(ind); - } - - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan DEF of inputs. If variable tensor, allocate it - // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto op_ind : _data.op_order) - { - auto op_inputs = - graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED; - - // Define outputs - for (const auto &ind : op_outputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder->notifyFirstUse(ind); - } - } - - // Scan variable tensors - // This tensor has features like constant. But OperandInfo and LowerInfo treat them as - // non-constant because of less memory usage by memory planning in here - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - const auto &operand = graph()->operands().at(ind); - if (operand.info().isVariable()) - { - // The variable tensor with buffer is not supported yet - assert(operand.data() == nullptr); - assert(operand.getUses().size() == 1 && !operand.getDef().valid()); - assert(uses_map[ind] == 1 && def_map[ind] == 0); - tensor_builder->notifyFirstUse(ind); - } - } - - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder->notifyLastUse(ind); - } - } - } - - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - if (uses_map[ind] == 0) - { - tensor_builder->notifyLastUse(ind); - } - }); - - // Dispose and validate - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); -} - -ITensorRegistry *BackendContext::genTensors() -{ - optimizer->optimize(); - - graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (external_operands().contains(ind)) - return; - - const auto frontend_layout = graph()->layout(); - const auto backend_layout = operand_layouts().at(ind); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; - tensor_builder->registerTensorInfo(ind, backend_info, backend_layout); - }); - - // TODO Get compiler options from compiler, and use it rather than getting it from Env - if (util::getConfigString(util::config::EXECUTOR) == "Linear") - { - planTensors(); - } - else - { - // For the executors that does not have fixed linear execution order: - // To make tensors never be deallocated, this is a workaround to use static memory planner - graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - if (tensor_builder->isRegistered(ind)) - tensor_builder->notifyFirstUse(ind); - }); - } - - tensor_builder->prepare(); - - return tensor_registry.get(); -} - -FunctionMap BackendContext::genKernels() -{ - FunctionMap ret; - - for (auto op_ind : _data.op_order) - { - auto fn_seq = kernel_gen->generate(op_ind); - ret.emplace_back(op_ind, std::move(fn_seq)); - } - - tensor_builder->allocate(); - initConsts(); - - // NOTE For memory optimization, we want to free some operand data - const_cast<ir::Graph &>(*_data.graph) - .operands() - .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - for (auto &it : ret) - { - auto &fn_seq = it.second; - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - tensor_builder->postFunctionPrepare(); - }); - } - - return ret; -} - -} // namespace acl_neon -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h index 35d777f7b..b73dd188e 100644 --- a/runtime/onert/backend/acl_neon/BackendContext.h +++ b/runtime/onert/backend/acl_neon/BackendContext.h @@ -17,10 +17,11 @@ #ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ #define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ -#include <backend/BackendContext.h> -#include "TensorBuilder.h" +#include <AclBackendContext.h> + #include "ConstantInitializer.h" #include "KernelGenerator.h" +#include "TensorBuilder.h" namespace onert { @@ -31,34 +32,8 @@ namespace acl_neon class Optimizer; -class BackendContext : public onert::backend::BackendContext -{ -public: - BackendContext(const Backend *backend, ContextData &&data, - std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, - std::shared_ptr<TensorBuilder> tensor_builder = nullptr, - std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, - std::shared_ptr<KernelGenerator> kernel_gen = nullptr) - : onert::backend::BackendContext(backend, std::move(data), tensor_registry), - tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{ - kernel_gen} - { - } - - ITensorRegistry *genTensors() override; - FunctionMap genKernels() override; - -private: - void initConsts(); - void planTensors(); - -public: - // TODO Make it private - std::shared_ptr<TensorBuilder> tensor_builder; - std::shared_ptr<ConstantInitializer> constant_initializer; - std::shared_ptr<KernelGenerator> kernel_gen; - std::shared_ptr<Optimizer> optimizer; -}; +using BackendContext = + acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>; } // namespace acl_neon } // namespace backend diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc index 35da7c952..1bd702756 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc @@ -37,21 +37,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) if (block_size_obj.isConstant()) { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; + _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>; } const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS); diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc index 781103f9c..283edd174 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.cc +++ b/runtime/onert/backend/acl_neon/Optimizer.cc @@ -16,12 +16,12 @@ #include "Optimizer.h" -#include "ParentInfo.h" +#include <AclSubTensorAnalyzer.h> -#include <cassert> #include <compiler/LoweredGraph.h> #include <util/logging.h> -#include "AclSubTensorAnalyzer.h" + +#include <cassert> namespace onert { diff --git a/runtime/onert/backend/cl_common/CMakeLists.txt b/runtime/onert/backend/cl_common/CMakeLists.txt new file mode 100644 index 000000000..c75129696 --- /dev/null +++ b/runtime/onert/backend/cl_common/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB_RECURSE SOURCES "src/*.cc") + +add_library(${LIB_ONERT_BACKEND_CL_COMMON} STATIC ${SOURCES}) + +target_include_directories(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +set_target_properties(${LIB_ONERT_BACKEND_CL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_link_libraries(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC onert_core) diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h new file mode 100644 index 000000000..7bb72d74e --- /dev/null +++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include <ir/Index.h> +#include <ir/OperandIndexMap.h> +#include <ir/OperandIndexSequence.h> +#include <util/logging.h> + +namespace onert +{ +namespace backend +{ +namespace cl_common +{ + +// TODO Find better way to handle common code (reduce template) +template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator> +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, ContextData &&data, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr, + std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, std::move(data), tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{ + kernel_gen} + { + } + + FunctionMap genKernels() override + { + FunctionMap ret; + + // kernel_gen + for (auto op_ind : _data.op_order) + { + auto fn_seq = kernel_gen->generate(op_ind); + ret.emplace_back(op_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + const_cast<ir::Graph &>(*_data.graph) + .operands() + .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; + } + +protected: + void initConsts() + { + _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { + constant_initializer->setLayout(graph()->layout()); + op.accept(*constant_initializer); + }); + + _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { + if (_data.external_operands.contains(ind) || !operand.isConstant()) + return; + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + }); + + constant_initializer->run(); + } + + virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout) = 0; + + void planTensors() + { + ir::OperandIndexMap<uint32_t> uses_map; + ir::OperandIndexMap<uint32_t> def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { + if (_data.external_operands.contains(ind)) + return; + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any operation (No use and def) + const auto info = obj.info(); + const auto layout = _data.operand_layouts.at(ind); + // TODO Change tensor info to have permuted shape + registerTensorInfo(ind, info, layout); + } + }); + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_ind : _data.op_order) + { + const auto &op = graph()->operations().at(op_ind); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + + _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { + if (uses_map[ind] == 0) + { + tensor_builder->notifyLastUse(ind); + } + }); + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); + } + +public: + // TODO Make it protected + std::shared_ptr<T_TensorBuilder> tensor_builder; + std::shared_ptr<T_ConstantInitializer> constant_initializer; + std::shared_ptr<T_KernelGenerator> kernel_gen; +}; + +} // namespace cl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h new file mode 100644 index 000000000..5fe5eec79 --- /dev/null +++ b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__ +#define __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__ + +#include "cl_common/ParentInfo.h" + +#include <ir/OperandIndexMap.h> + +#include <map> +#include <vector> + +namespace onert +{ +namespace backend +{ +namespace cl_common +{ + +// TODO Abstract UserType into LifetimeMap and LifetimeSeq +enum class UsesType +{ + FIRST, + LAST +}; + +// TODO Define class or struct for LifetimeMap and LifetimeSeq +using LifetimeMap = std::map<size_t, std::pair<UsesType, ir::OperandIndex>>; +using LifetimeSeq = std::vector<std::pair<UsesType, ir::OperandIndex>>; + +LifetimeMap createLifetimeMap(LifetimeSeq &seq, ir::OperandIndexMap<ParentInfo> &parent_map); + +} // namespace cl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__ diff --git a/runtime/onert/backend/gpu_cl/ParentInfo.h b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h index d7cb2d4fb..510211cb7 100644 --- a/runtime/onert/backend/gpu_cl/ParentInfo.h +++ b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_PARENT_INFO_H__ -#define __ONERT_BACKEND_PARENT_INFO_H__ +#ifndef __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__ +#define __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__ #include <ir/Index.h> #include <ir/Coordinates.h> @@ -24,7 +24,7 @@ namespace onert { namespace backend { -namespace gpu_cl +namespace cl_common { /** @@ -37,8 +37,8 @@ struct ParentInfo ir::Coordinates coordinates; }; -} // namespace gpu_cl +} // namespace cl_common } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__ +#endif // __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__ diff --git a/runtime/onert/backend/cl_common/src/LifetimeMap.cc b/runtime/onert/backend/cl_common/src/LifetimeMap.cc new file mode 100644 index 000000000..0b17c58fb --- /dev/null +++ b/runtime/onert/backend/cl_common/src/LifetimeMap.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cl_common/LifetimeMap.h" + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace cl_common +{ + +LifetimeMap createLifetimeMap(LifetimeSeq &lifetime_seq, + ir::OperandIndexMap<ParentInfo> &parent_map) +{ + // Update lifetime sequence to apply subtensor optimization + std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map; + std::function<ir::OperandIndex &(ir::OperandIndex)> find_root = + [&](ir::OperandIndex ind) -> ir::OperandIndex & { + ir::OperandIndex &ret = root_map[ind]; + + // We know the root parent value already + if (ret.valid()) + return ret; + + auto itr = parent_map.find(ind); + if (itr == parent_map.end()) + { + // If there is no parent, let's store the value of itself + return ret = ind; + } + else + { + return ret = find_root(itr->second.parent); + } + }; + + ir::OperandIndexMap<bool> first_use_check; + ir::OperandIndexMap<bool> last_use_check; + LifetimeMap lifetime_map; + for (size_t i = 0; i < lifetime_seq.size(); i++) + { + auto &entry = lifetime_seq[i]; + if (entry.first != UsesType::FIRST) + continue; + auto root_ind = find_root(entry.second); + if (first_use_check[root_ind]) + continue; + first_use_check[root_ind] = true; + lifetime_map[i] = {UsesType::FIRST, root_ind}; + } + + for (int i = lifetime_seq.size() - 1; i >= 0; i--) + { + auto &entry = lifetime_seq[i]; + if (entry.first != UsesType::LAST) + continue; + auto root_ind = find_root(entry.second); + if (last_use_check[root_ind]) + continue; + last_use_check[root_ind] = true; + lifetime_map[i] = {UsesType::LAST, root_ind}; + } + + return lifetime_map; +} + +} // namespace cl_common +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc index 8ac875842..aa4ef352e 100644 --- a/runtime/onert/backend/cpu/ops/OperationUtils.cc +++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc @@ -194,7 +194,7 @@ void CalculateActivationRangeQuantized(ir::Activation activation, const IPortabl } else { - std::cout << "Unsupported fused activation function." << std::endl; + throw std::runtime_error{"Unsupported fused activation function."}; } } diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h index ac2fbb84f..1fefc3228 100644 --- a/runtime/onert/backend/cpu/ops/OperationUtils.h +++ b/runtime/onert/backend/cpu/ops/OperationUtils.h @@ -18,19 +18,19 @@ #define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ #include <backend/IPortableTensor.h> - -#include <cker/Shape.h> -#include <cker/Types.h> -#include <iostream> #include <ir/DataType.h> -#include <ir/InternalType.h> #include <ir/Operand.h> #include <ir/Padding.h> +#include <util/CalculateActivationRange.h> + +#include <cker/Shape.h> +#include <cker/Types.h> #include <limits> #include <vector> using OperandType = onert::ir::DataType; +using namespace onert::util; namespace onert { @@ -166,40 +166,6 @@ void GetQuantizedConvolutionMultipliersAndShifts( int num_channels, std::vector<int32_t> &per_channel_output_multiplier, std::vector<int> &per_channel_output_shift); -template <typename T> -void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) -{ - if (activation == ir::Activation::RELU) - { - *activation_min = 0; - *activation_max = std::numeric_limits<T>::max(); - } - else if (activation == ir::Activation::RELU6) - { - *activation_min = 0; - *activation_max = 6; - } - else if (activation == ir::Activation::RELU1) - { - *activation_min = -1; - *activation_max = 1; - } - else if (activation == ir::Activation::SIGMOID) - { - *activation_min = 0; - *activation_max = 1; - } - else if (activation == ir::Activation::NONE) - { - *activation_min = std::numeric_limits<T>::lowest(); - *activation_max = std::numeric_limits<T>::max(); - } - else - { - std::cout << "Unsupported fused activation function." << std::endl; - } -} - void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max); diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h index dc0b8596c..d67ba1602 100644 --- a/runtime/onert/backend/gpu_cl/Backend.h +++ b/runtime/onert/backend/gpu_cl/Backend.h @@ -22,13 +22,13 @@ #include "BackendContext.h" #include "Config.h" -#include "ClTensorRegistry.h" +#include "TensorRegistry.h" #include "KernelGenerator.h" #include "TensorManager.h" #include "TensorBuilder.h" -#include "open_cl/Environment.h" -#include "open_cl/Status.h" +#include "tensorflow/lite/delegates/gpu/cl/environment.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" namespace onert { @@ -50,22 +50,22 @@ public: const auto &operands = data.graph->operands(); auto context = std::make_unique<gpu_cl::BackendContext>(this, std::move(data)); - auto environment = std::make_shared<Environment>(); + auto environment = std::make_shared<tflite::gpu::cl::Environment>(); if (!CreateEnvironment(environment.get()).ok()) { return nullptr; } auto tm = createTensorManager(&environment->context()); - auto tr = std::make_shared<ClTensorRegistry<TensorManager>>(tm); + auto tr = std::make_shared<TensorRegistry>(tm); - InferenceContext::CreateInferenceInfo create_info; - create_info.precision = CalculationsPrecision::F32; + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info; + create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32; create_info.storage_type = - GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo()); - create_info.hints.Add(ModelHints::kFastestInference); + tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo()); + create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference); - auto cc = std::make_shared<CreationContext>(); + auto cc = std::make_shared<tflite::gpu::cl::CreationContext>(); cc->device = environment->GetDevicePtr(); cc->context = &environment->context(); cc->queue = environment->queue(); diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc index 6c3ac81a2..ec9442155 100644 --- a/runtime/onert/backend/gpu_cl/BackendContext.cc +++ b/runtime/onert/backend/gpu_cl/BackendContext.cc @@ -33,147 +33,26 @@ namespace backend namespace gpu_cl { -void BackendContext::initConsts() +void BackendContext::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout) { - _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) { - constant_initializer->setLayout(graph()->layout()); - op.accept(*constant_initializer); - }); - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - if (_data.external_operands.contains(ind) || !operand.isConstant()) - return; - const auto &obj = graph()->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - }); - - constant_initializer->run(); + TensorType type = TensorType::TENSOR_TYPE_VALID; + tensor_builder->registerTensorInfo(ind, info, backend_layout, type); } -void BackendContext::planTensors() +ITensorRegistry *BackendContext::genTensors() { - ir::OperandIndexMap<uint32_t> uses_map; - ir::OperandIndexMap<uint32_t> def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - if (_data.external_operands.contains(ind)) - return; - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - if (obj.isConstant()) - constants.append(ind); - - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any operation (No use and def) - const auto info = obj.info(); - const auto layout = _data.operand_layouts.at(ind); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, layout); - } - }); - - // Start scanning to do notify{First|Last}Use for each tensor + ir::OperandIndexMap<TensorType> type_map; - // If a tensor is a constant, increase the use of the tensor and allocate it first. - // Increasing use count here makes the tensor never be deallocated, i.e it they will be - // deallocated last. - VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) + for (const auto &ind : graph()->getInputs()) { - uses_map[ind]++; - tensor_builder->notifyFirstUse(ind); + type_map[ind] = TensorType::TENSOR_TYPE_INPUT; } - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan DEF of inputs. If variable tensor, allocate it - // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto op_ind : _data.op_order) + for (const auto &ind : graph()->getOutputs()) { - const auto &op = graph()->operations().at(op_ind); - auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - - // Define outputs - for (const auto &ind : op_outputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder->notifyFirstUse(ind); - } - } - - // Scan variable tensors - // This tensor has features like constant. But OperandInfo and LowerInfo treat them as - // non-constant because of less memory usage by memory planning in here - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - const auto &operand = graph()->operands().at(ind); - if (operand.info().isVariable()) - { - // The variable tensor with buffer is not supported yet - assert(operand.data() == nullptr); - assert(operand.getUses().size() == 1 && !operand.getDef().valid()); - assert(uses_map[ind] == 1 && def_map[ind] == 0); - tensor_builder->notifyFirstUse(ind); - } - } - - for (const auto &ind : op_inputs) - { - if (!tensor_builder->isRegistered(ind)) - continue; - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder->notifyLastUse(ind); - } - } + type_map[ind] = TensorType::TENSOR_TYPE_OUTPUT; } - - _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) { - if (uses_map[ind] == 0) - { - tensor_builder->notifyLastUse(ind); - } - }); - - // Dispose and validate - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; })); -} - -ITensorRegistry *BackendContext::genTensors() -{ graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { if (external_operands().contains(ind)) return; @@ -182,7 +61,11 @@ ITensorRegistry *BackendContext::genTensors() const auto backend_layout = operand_layouts().at(ind); ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; - tensor_builder->registerTensorInfo(ind, backend_info, backend_layout); + if (obj.isConstant()) + { + type_map[ind] = TensorType::TENSOR_TYPE_INPUT; + } + tensor_builder->registerTensorInfo(ind, backend_info, backend_layout, type_map[ind]); }); // TODO Get compiler options from compiler, and use it rather than getting it from Env @@ -199,44 +82,10 @@ ITensorRegistry *BackendContext::genTensors() tensor_builder->notifyFirstUse(ind); }); } - tensor_builder->prepare(); - return tensor_registry.get(); } -FunctionMap BackendContext::genKernels() -{ - FunctionMap ret; - - // kernel_gen - for (auto op_ind : _data.op_order) - { - auto fn_seq = kernel_gen->generate(op_ind); - ret.emplace_back(op_ind, std::move(fn_seq)); - } - - tensor_builder->allocate(); - - initConsts(); - - // NOTE For memory optimization, we want to free some operand data - const_cast<ir::Graph &>(*_data.graph) - .operands() - .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - for (auto &it : ret) - { - auto &fn_seq = it.second; - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - tensor_builder->postFunctionPrepare(); - }); - } - - return ret; -} - } // namespace gpu_cl } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h index f17489e7a..7412d2bce 100644 --- a/runtime/onert/backend/gpu_cl/BackendContext.h +++ b/runtime/onert/backend/gpu_cl/BackendContext.h @@ -20,10 +20,12 @@ #include <backend/BackendContext.h> #include <util/ConfigSource.h> +#include <cl_common/BackendContext.h> + #include "ConstantInitializer.h" #include "KernelGenerator.h" #include "TensorBuilder.h" -#include "open_cl/InferenceContext.h" +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" namespace onert { @@ -32,31 +34,28 @@ namespace backend namespace gpu_cl { -class BackendContext : public onert::backend::BackendContext +class BackendContext + : public onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer, + KernelGenerator> { public: BackendContext(const Backend *backend, ContextData &&data, - std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorRegistry> tensor_registry = nullptr, std::shared_ptr<TensorBuilder> tensor_builder = nullptr, std::shared_ptr<ConstantInitializer> constant_initializer = nullptr, std::shared_ptr<KernelGenerator> kernel_gen = nullptr) - : onert::backend::BackendContext(backend, std::move(data), tensor_registry), - tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{ - kernel_gen} + : onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer, + KernelGenerator>( + backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen) { + // DO NOTHING } ITensorRegistry *genTensors() override; - FunctionMap genKernels() override; - -private: - void initConsts(); - void planTensors(); -public: - std::shared_ptr<TensorBuilder> tensor_builder; - std::shared_ptr<ConstantInitializer> constant_initializer; - std::shared_ptr<KernelGenerator> kernel_gen; +protected: + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout) override; }; } // namespace gpu_cl diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt index 49bae37f8..eb1964214 100644 --- a/runtime/onert/backend/gpu_cl/CMakeLists.txt +++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt @@ -1,14 +1,14 @@ set(LIB_ONERT_BACKEND_GPU_CL onert_backend_gpu_cl) +if(NOT BUILD_GPU_CL) + return() +endif(NOT BUILD_GPU_CL) + nnas_find_package(Opencl_Headers QUIET) if(NOT Opencl_Headers_FOUND) return() endif(NOT Opencl_Headers_FOUND) -if(NOT BUILD_GPU_CL) - return() -endif(NOT BUILD_GPU_CL) - nnas_find_package(Farmhash QUIET) if(NOT Farmhash_FOUND) return() @@ -19,18 +19,32 @@ if(NOT Abseil_FOUND) return() endif(NOT Abseil_FOUND) -file(GLOB_RECURSE SOURCES "*.cc") +nnfw_find_package(Fp16 QUIET) +if(NOT Fp16_FOUND) + return() +endif(NOT Fp16_FOUND) +nnas_find_package(TensorFlowGpu QUIET) +if(NOT TensorFlowGpu_FOUND) + message(FATAL_ERROR 'TensorFlowGpu lib not found') + return() +endif(NOT TensorFlowGpu_FOUND) + +file(GLOB_RECURSE SOURCES "*.cc") add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES}) target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR}) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash) -target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE Headers) +target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers) +target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16) +target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON}) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common) target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage) diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc index b3ef2f560..05dd8e2a3 100644 --- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc +++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc @@ -93,6 +93,9 @@ void ClConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &i case DataType::FLOAT32: _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout); break; + case DataType::INT32: + _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout); + break; default: throw std::runtime_error("Not supported, yet"); break; diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h index d7d21e847..95e228acd 100644 --- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h +++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h @@ -17,8 +17,6 @@ #ifndef __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__ -#include "ClTensorRegistry.h" - #include <unordered_map> #include <functional> diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h index 9d3d69092..5e8a11a84 100644 --- a/runtime/onert/backend/gpu_cl/ClFunction.h +++ b/runtime/onert/backend/gpu_cl/ClFunction.h @@ -22,9 +22,9 @@ #include <vector> #include <memory> -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/ClCommandQueue.h" -#include "open_cl/Status.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" namespace onert { @@ -32,19 +32,18 @@ namespace backend { namespace gpu_cl { - class ClFunction : public ::onert::exec::IFunction { public: ClFunction() : _gpu_operations(), _creation_context() {} public: - void configure(std::shared_ptr<CreationContext> creation_context) + void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context) { _creation_context = creation_context; } - void add_operation(std::unique_ptr<GPUOperation> gpu_operation) + void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation) { _gpu_operations.push_back(std::move(gpu_operation)); } @@ -57,6 +56,10 @@ public: { throw std::runtime_error("Failed to AddToQueue."); } + if (!_creation_context->queue->WaitForCompletion().ok()) + { + throw std::runtime_error("Failed to WaitForCompletion."); + } } } @@ -77,8 +80,8 @@ public: } private: - std::vector<std::unique_ptr<GPUOperation>> _gpu_operations; - std::shared_ptr<CreationContext> _creation_context; + std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations; + std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context; }; } // namespace gpu_cl diff --git a/runtime/onert/backend/gpu_cl/ClMemoryManager.h b/runtime/onert/backend/gpu_cl/ClMemoryManager.h deleted file mode 100644 index 3bac0d51d..000000000 --- a/runtime/onert/backend/gpu_cl/ClMemoryManager.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ -#define __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ - -#include <cassert> - -#include "ir/OperandIndexMap.h" -#include "ir/Shape.h" -#include "open_cl/ClContext.h" -#include "open_cl/InferenceContext.h" -#include "open_cl/Status.h" -#include "open_cl/StorageTypeUtil.h" -#include "open_cl/TensorType.h" -#include "util/logging.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <typename T_ITensor, typename T_Tensor> class ClMemoryManager -{ -public: - ClMemoryManager(CLContext *context) : _context{context} {} - - virtual ~ClMemoryManager() = default; - - virtual void allocate(void) - { - for (const auto &tensor_entry : _tensors) - { - auto tensor = tensor_entry.second; - const auto &t = tensor_reserver_.Get(tensor_entry.first.value()); - const auto &shape = t->shape; - const auto &descriptor = t->descriptor; - if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok()) - { - return; - } - } - } - - virtual void deallocate(void) - { - // NYI - } - - virtual void startLifetime(const ir::OperandIndex &) - { /* DO NOTHING */ - } - virtual void finishLifetime(const ir::OperandIndex &) - { /* DO NOTHING */ - } - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, - InferenceContext::CreateInferenceInfo create_info, - std::shared_ptr<Environment> environment, DeviceInfo &device_info) - { - ValueId max_id = 0; - auto data_type = DeduceDataTypeFromPrecision(create_info.precision); - const auto shape = info.shape(); - - auto tensor = std::make_shared<T_Tensor>(shape.rank(), shape, environment); - _tensors[ind] = tensor; - - BHWC t_shape; - switch (shape.rank()) - { - case 1: - // B layout - t_shape = BHWC(shape.dim(0), 1, 1, 1); - break; - case 2: - // BC layout - t_shape = BHWC(shape.dim(0), 1, 1, shape.dim(1)); - break; - case 3: - // BWC layout - t_shape = BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2)); - break; - case 4: - // BHWC layout - t_shape = BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)); - break; - default: - break; - } - - TensorStorageType storage_type = create_info.storage_type; - Layout layout = t_shape.b == 1 ? Layout::HWC : Layout::BHWC; - - ValueId id = ind.value(); - storage_type = SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout); - auto dummy = std::make_shared<InferenceContext::DummyTensor>(); - dummy->shape = t_shape; - dummy->descriptor = TensorDescriptor{data_type, storage_type, layout}; - tensor_reserver_.Add(id, dummy); - - max_id = std::max(max_id, id); - - tensor_reserver_.SetNext(max_id + 1); - } - - ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; } - - InferenceContext::TensorReserver &tensorReservers(void) { return tensor_reserver_; } - -private: - ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors; - InferenceContext::TensorReserver tensor_reserver_; - CLContext *_context; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__ diff --git a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h b/runtime/onert/backend/gpu_cl/ClTensorBuilder.h deleted file mode 100644 index 951bbd844..000000000 --- a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CL_TENSOR_BUILDER_H__ -#define __ONERT_BACKEND_CL_TENSOR_BUILDER_H__ - -#include <memory> -#include <queue> - -#include "ClTensorManager.h" -#include "ClTensorRegistry.h" -#include "ParentInfo.h" - -#include "open_cl/TensorType.h" -#include "open_cl/TensorTypeUtil.h" -#include "open_cl/ClDevice.h" -#include "open_cl/InferenceContext.h" - -#include "ir/OperandIndexMap.h" -#include "ir/OperandIndexSequence.h" -#include <ir/Operands.h> -#include <util/Utils.h> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class UsesType -{ - FIRST, - LAST -}; - -template <typename T_ITensor, typename T_Tensor> class ClTensorBuilder -{ -public: - using T_ClTensorManager = ClTensorManager<T_ITensor, T_Tensor>; - - ClTensorBuilder(const ir::Operands &operands, T_ClTensorManager *tensor_mgr, - InferenceContext::CreateInferenceInfo create_info, - const std::shared_ptr<Environment> &environment); - - /** - * @brief Register tensor information to allocate on ACL-CL backend - * @param[in] ind Operand index - * @param[in] info Tensor information - * @param[in] layout Tensor data layout - */ - void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout); - - void notifyFirstUse(const ir::OperandIndex &); - void notifyLastUse(const ir::OperandIndex &); - - bool isRegistered(const ir::OperandIndex &) const; - - void prepare(); - void allocate(); - void postFunctionPrepare(); - - T_ClTensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); } - - void setUsesCount(const ir::OperandIndex &index, size_t num_uses) - { - assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses - : true); - _uses_count_map[index] = num_uses; - } - - void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map) - { - _parent_map = std::move(parent_map); - } - - bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq); - - /** - * @brief Check child tensor is allocated as subtensor of parent tensor - * @param[in] parent Index of parent - * @param[in] child Index of child - * @return @c true if child is allocated as subtensor of parent, otherwise @c false - */ - bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child); - -private: - void buildTensors(void); - ir::OperandIndex findRootParent(ir::OperandIndex index); - -private: - const ir::Operands &_operands; - ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; - ir::OperandIndexMap<ir::Layout> _tensor_layout_map; - ir::OperandIndexMap<size_t> _uses_count_map; - - std::unique_ptr<T_ClTensorManager> _tensor_mgr; - InferenceContext::CreateInferenceInfo _create_info; - std::shared_ptr<Environment> _environment; - - // for linear executor - std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; - - // Extra info for concat elimination - ir::OperandIndexMap<ParentInfo> _parent_map; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#include <cassert> -#include <stack> - -#include "util/logging.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <typename T_ITensor, typename T_Tensor> -ClTensorBuilder<T_ITensor, T_Tensor>::ClTensorBuilder( - const ir::Operands &operands, T_ClTensorManager *tensor_mgr, - InferenceContext::CreateInferenceInfo create_info, - const std::shared_ptr<Environment> &environment) - : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{ - environment} -{ - assert(_tensor_mgr); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::registerTensorInfo(const ir::OperandIndex &ind, - const ir::OperandInfo &info, - ir::Layout backend_layout) -{ - assert(_tensor_mgr->constTensors().size() == 0); - assert(_tensor_mgr->nonconstTensors().size() == 0); - - _uses_count_map[ind] = _operands.at(ind).getUses().size(); - - _tensor_info_map.emplace(ind, info); - _tensor_layout_map.insert({ind, backend_layout}); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::notifyFirstUse(const ir::OperandIndex &ind) -{ - _lifetime_seq.emplace_back(UsesType::FIRST, ind); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::notifyLastUse(const ir::OperandIndex &ind) -{ - _lifetime_seq.emplace_back(UsesType::LAST, ind); -} - -template <typename T_ITensor, typename T_Tensor> -bool ClTensorBuilder<T_ITensor, T_Tensor>::isRegistered(const ir::OperandIndex &ind) const -{ - return _tensor_info_map.find(ind) != _tensor_info_map.end(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::prepare(void) -{ - buildTensors(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::allocate(void) -{ - // Update lifetime sequence to apply subtensor optimization - - std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map; - std::function<ir::OperandIndex &(ir::OperandIndex)> find_root = - [&](ir::OperandIndex ind) -> ir::OperandIndex & { - ir::OperandIndex &ret = root_map[ind]; - - // We know the root parent value already - if (ret.valid()) - return ret; - - auto itr = _parent_map.find(ind); - if (itr == _parent_map.end()) - { - // If there is no parent, let's store the value of itself - return ret = ind; - } - else - { - return ret = find_root(itr->second.parent); - } - }; - - ir::OperandIndexMap<bool> first_use_check; - ir::OperandIndexMap<bool> last_use_check; - std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map; - for (size_t i = 0; i < _lifetime_seq.size(); i++) - { - auto &entry = _lifetime_seq[i]; - if (entry.first != UsesType::FIRST) - continue; - auto root_ind = find_root(entry.second); - if (first_use_check[root_ind]) - continue; - first_use_check[root_ind] = true; - lifetime_map[i] = {UsesType::FIRST, root_ind}; - } - - for (int i = _lifetime_seq.size() - 1; i >= 0; i--) - { - auto &entry = _lifetime_seq[i]; - if (entry.first != UsesType::LAST) - continue; - auto root_ind = find_root(entry.second); - if (last_use_check[root_ind]) - continue; - last_use_check[root_ind] = true; - lifetime_map[i] = {UsesType::LAST, root_ind}; - } - - for (auto &entry : lifetime_map) - { - auto &use = entry.second; - auto use_type = use.first; - auto use_index = use.second; - assert(use_index.valid()); - if (use_type == UsesType::FIRST) - _tensor_mgr->startLifetime(use_index); - else - _tensor_mgr->finishLifetime(use_index); - } - - _tensor_mgr->allocateConsts(); - - // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses - // After refactoring BackendContext we can uncomment this - // assert(_tensor_info_map.size() == - // _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map + - // _parent_map.size()); - _tensor_mgr->allocateNonconsts(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::postFunctionPrepare(void) -{ - _tensor_mgr->tryDeallocConstants(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorBuilder<T_ITensor, T_Tensor>::buildTensors(void) -{ - assert(_tensor_mgr->constTensors().size() == 0); - assert(_tensor_mgr->nonconstTensors().size() == 0); - // Normal tensors - for (auto &entry : _tensor_info_map) - { - auto ind = entry.first; - if (_parent_map.count(ind) > 0) - continue; - - const auto &info = entry.second; - _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_); - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/gpu_cl/ClTensorManager.h b/runtime/onert/backend/gpu_cl/ClTensorManager.h deleted file mode 100644 index 49a11730f..000000000 --- a/runtime/onert/backend/gpu_cl/ClTensorManager.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ -#define __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ - -#include "ClMemoryManager.h" - -#include "open_cl/InferenceContext.h" -#include "open_cl/TensorType.h" - -#include "ir/OperandInfo.h" -#include "ir/OperandIndexMap.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <typename T_ITensor, typename T_Tensor> class ClTensorManager -{ -public: - using T_ClMemoryManager = ClMemoryManager<T_ITensor, T_Tensor>; - - ClTensorManager(T_ClMemoryManager *const_mgr, T_ClMemoryManager *nonconst_mgr); - - virtual ~ClTensorManager() = default; - - void allocateConsts(void); - void allocateNonconsts(void); - void deallocateConsts(void); - void deallocateNonconsts(void); - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, - InferenceContext::CreateInferenceInfo create_info, - std::shared_ptr<Environment> environment, DeviceInfo &device_info); - - std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind); - - void startLifetime(const ir::OperandIndex &ind); - void finishLifetime(const ir::OperandIndex &ind); - - std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind); - std::shared_ptr<InferenceContext::DummyTensor> atR(const ir::OperandIndex &ind); - - InferenceContext::TensorReserver &constTensorReservers(void); - InferenceContext::TensorReserver &nonconstTensorReservers(void); - - ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void); - ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void); - - void iterate(const std::function<void(const ir::OperandIndex &)> &fn); - - void tryDeallocConstants(void); - -private: - std::unique_ptr<T_ClMemoryManager> _const_mgr; - std::unique_ptr<T_ClMemoryManager> _nonconst_mgr; - ir::OperandIndexMap<T_ClMemoryManager &> _ind_to_mgr; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#include <cassert> -#include "util/logging.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <typename T_ITensor, typename T_Tensor> -ClTensorManager<T_ITensor, T_Tensor>::ClTensorManager(T_ClMemoryManager *const_mgr, - T_ClMemoryManager *nonconst_mgr) - : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr} -{ - // DO NOTHING -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::allocateConsts(void) -{ - _const_mgr->allocate(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::allocateNonconsts(void) -{ - _nonconst_mgr->allocate(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::deallocateConsts(void) -{ - _const_mgr->deallocate(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::deallocateNonconsts(void) -{ - _nonconst_mgr->deallocate(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::buildTensor( - const ir::OperandIndex &ind, const ir::OperandInfo &info, - InferenceContext::CreateInferenceInfo create_info, std::shared_ptr<Environment> environment, - DeviceInfo &device_info) -{ - assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end()); - - if (info.isConstant()) - { - _const_mgr->buildTensor(ind, info, create_info, environment, device_info); - _ind_to_mgr.insert({ind, *_const_mgr}); - } - else - { - _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info); - _ind_to_mgr.insert({ind, *_nonconst_mgr}); - } -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::startLifetime(const ir::OperandIndex &ind) -{ - assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); - _ind_to_mgr.at(ind).startLifetime(ind); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::finishLifetime(const ir::OperandIndex &ind) -{ - assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); - _ind_to_mgr.at(ind).finishLifetime(ind); -} - -template <typename T_ITensor, typename T_Tensor> -std::shared_ptr<T_ITensor> ClTensorManager<T_ITensor, T_Tensor>::at(const ir::OperandIndex &ind) -{ - if (_ind_to_mgr.find(ind) == _ind_to_mgr.end()) - return nullptr; - - auto &tensors = _ind_to_mgr.at(ind).tensors(); - if (tensors.find(ind) != tensors.end()) - { - return tensors.at(ind); - } - - return nullptr; -} - -template <typename T_ITensor, typename T_Tensor> -ir::OperandIndexMap<std::shared_ptr<T_Tensor>> & -ClTensorManager<T_ITensor, T_Tensor>::constTensors(void) -{ - return _const_mgr->tensors(); -} - -template <typename T_ITensor, typename T_Tensor> -ir::OperandIndexMap<std::shared_ptr<T_Tensor>> & -ClTensorManager<T_ITensor, T_Tensor>::nonconstTensors(void) -{ - return _nonconst_mgr->tensors(); -} - -template <typename T_ITensor, typename T_Tensor> -std::shared_ptr<InferenceContext::DummyTensor> -ClTensorManager<T_ITensor, T_Tensor>::atR(const ir::OperandIndex &ind) -{ - if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value())) - { - return _nonconst_mgr->tensorReservers().Get(ind.value()); - } - else if (_const_mgr->tensorReservers().HaveTensor(ind.value())) - { - return _const_mgr->tensorReservers().Get(ind.value()); - } - return nullptr; -} - -template <typename T_ITensor, typename T_Tensor> -InferenceContext::TensorReserver &ClTensorManager<T_ITensor, T_Tensor>::constTensorReservers(void) -{ - return _const_mgr->tensorReservers(); -} - -template <typename T_ITensor, typename T_Tensor> -InferenceContext::TensorReserver & -ClTensorManager<T_ITensor, T_Tensor>::nonconstTensorReservers(void) -{ - return _nonconst_mgr->tensorReservers(); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::iterate( - const std::function<void(const ir::OperandIndex &)> &fn) -{ - for (auto it : _nonconst_mgr->tensors()) - fn(it.first); - - for (auto it : _const_mgr->tensors()) - fn(it.first); -} - -template <typename T_ITensor, typename T_Tensor> -void ClTensorManager<T_ITensor, T_Tensor>::tryDeallocConstants(void) -{ - // NYI -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/gpu_cl/Config.cc b/runtime/onert/backend/gpu_cl/Config.cc index 067a2070f..9959a471b 100644 --- a/runtime/onert/backend/gpu_cl/Config.cc +++ b/runtime/onert/backend/gpu_cl/Config.cc @@ -17,8 +17,11 @@ #include "Config.h" #include <dlfcn.h> -#include "open_cl/OpenclWrapper.h" -#include "open_cl/Status.h" + +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" + +using namespace tflite::gpu::cl; namespace onert { @@ -26,12 +29,9 @@ namespace backend { namespace gpu_cl { - -Config::~Config() { UnloadOpenCL(_handle); } - bool Config::initialize() { - if (LoadOpenCL(&_handle).ok()) + if (LoadOpenCL().ok()) { return true; } diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h index aa5a51a15..6a455bbb5 100644 --- a/runtime/onert/backend/gpu_cl/Config.h +++ b/runtime/onert/backend/gpu_cl/Config.h @@ -31,7 +31,7 @@ namespace gpu_cl class Config : public IConfig { public: - virtual ~Config(); + virtual ~Config() {} public: std::string id() override { return "gpu_cl"; } diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc index a84867f8c..04edc3928 100644 --- a/runtime/onert/backend/gpu_cl/KernelGenerator.cc +++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc @@ -19,13 +19,14 @@ #include "KernelGenerator.h" -#include "ClTensorRegistry.h" #include "ClFunction.h" #include "TensorManager.h" -#include "open_cl/selectors/ConvolutionSelector.h" -#include "open_cl/selectors/DwConvolutionSelector.h" -#include "open_cl/selectors/SimpleSelectors.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h" +#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h" +#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h" #include "ir/Operations.h" #include "ir/Operations.Include.h" @@ -37,6 +38,9 @@ #include "util/logging.h" #include "util/Utils.h" +using namespace tflite::gpu; +using namespace tflite::gpu::cl; + namespace onert { namespace backend @@ -60,14 +64,14 @@ void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *a } } -gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir) +PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir) { switch (type_ir) { case ir::operation::Pool2D::PoolType::AVG: - return gpu_cl::PoolingType::AVERAGE; + return PoolingType::AVERAGE; case ir::operation::Pool2D::PoolType::MAX: - return gpu_cl::PoolingType::MAX; + return PoolingType::MAX; default: throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet"); } @@ -75,7 +79,7 @@ gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir) KernelGenerator::KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder, - const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg, + const std::shared_ptr<TensorRegistry> &tensor_reg, const std::shared_ptr<CreationContext> &creation_context) : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx(graph.operations()), _current_layout{graph.layout()}, @@ -190,7 +194,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) auto bias_tensor = _tensor_reg->getClTensor(bias); auto output_tensor = _tensor_reg->getClTensor(output); - gpu_cl::Convolution2DAttributes attr; + Convolution2DAttributes attr; attr.strides = ToHW(param.stride.vertical, param.stride.horizontal); attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor), std::max(static_cast<u_int32_t>(1), param.dilation.width_factor)); @@ -237,7 +241,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) { std::unique_ptr<GPUOperation> gpu_op_1; OperationDef op_def_1; - std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>(); + std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>(); _new_tensors[output] = new_tensor; if (!CreateTensor(*_creation_context->context, output_shape, @@ -334,9 +338,9 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const int filter_width = ker_shape.w; const int output_depth = out_shape.c; - InternalTensor<OHWI, DataType::FLOAT32> weights; + tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights; weights.id = attr.weights.id; - weights.shape = OHWI(output_depth, filter_height, filter_width, input_depth); + weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth); weights.data.resize(weights.shape.DimensionsProduct()); float *dst = &weights.data[0]; for (int j = 0; j < output_depth; ++j) @@ -387,7 +391,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) { std::unique_ptr<GPUOperation> gpu_op_1; OperationDef op_def_1; - std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>(); + std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>(); _new_tensors[ofm_index] = new_tensor; if (!CreateTensor(*_creation_context->context, out_shape, diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h index 3e341b111..91fd3cd9d 100644 --- a/runtime/onert/backend/gpu_cl/KernelGenerator.h +++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h @@ -17,11 +17,13 @@ #ifndef __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__ -#include "ClTensorRegistry.h" +#include "TensorRegistry.h" #include "backend/basic/TensorRegistry.h" #include "TensorBuilder.h" #include "TensorManager.h" +#include "tensorflow/lite/delegates/gpu/api.h" + #include <backend/CustomKernelBuilder.h> #include <backend/basic/KernelGeneratorBase.h> #include <ir/Operands.h> @@ -39,8 +41,8 @@ class KernelGenerator : public basic::KernelGeneratorBase { public: KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder, - const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg, - const std::shared_ptr<CreationContext> &creation_context); + const std::shared_ptr<TensorRegistry> &tensor_reg, + const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context); std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override; @@ -58,9 +60,9 @@ private: const ir::Operations &_operations_ctx; ir::Layout _current_layout; std::shared_ptr<TensorBuilder> _tensor_builder; - std::shared_ptr<ClTensorRegistry<TensorManager>> _tensor_reg; - std::shared_ptr<CreationContext> _creation_context; - ir::OperandIndexMap<std::shared_ptr<Tensor>> _new_tensors; + std::shared_ptr<TensorRegistry> _tensor_reg; + std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context; + ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors; }; } // namespace gpu_cl diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h new file mode 100644 index 000000000..a3b9b39de --- /dev/null +++ b/runtime/onert/backend/gpu_cl/MemoryManager.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__ +#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__ + +#include "ex/InferenceContextEx.h" +#include "operand/CLTensor.h" + +#include "ir/OperandIndexMap.h" +#include "ir/OperandInfo.h" +#include "util/logging.h" + +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ + +class MemoryManager +{ +public: + MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {} + + ~MemoryManager() = default; + + void allocate(void) + { + for (const auto &tensor_entry : _tensors) + { + auto tensor = tensor_entry.second; + auto type = tensor->get_type(); + + // if (type == TensorType::TENSOR_TYPE_DELETE) { + // continue; + // } + + const auto &t = tensor_reserver_.Get(tensor_entry.first.value()); + const auto &shape = t->shape; + const auto &descriptor = t->descriptor; + if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok()) + { + std::runtime_error("Failed to CreateTensor"); + } + switch (type) + { + case TensorType::TENSOR_TYPE_INPUT: + tensor->writeConvertInit(); + break; + case TensorType::TENSOR_TYPE_OUTPUT: + tensor->readConvertInit(); + break; + default: + break; + } + } + } + + void deallocate(void) + { + // NYI + } + + void startLifetime(const ir::OperandIndex &) + { /* DO NOTHING */ + } + void finishLifetime(const ir::OperandIndex &) + { /* DO NOTHING */ + } + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info, + std::shared_ptr<tflite::gpu::cl::Environment> environment, + tflite::gpu::cl::DeviceInfo &device_info, TensorType type) + { + tflite::gpu::ValueId max_id = 0; + auto data_type = DeduceDataTypeFromPrecision(create_info.precision); + const auto shape = info.shape(); + + auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type); + _tensors[ind] = tensor; + tflite::gpu::BHWC t_shape; + switch (shape.rank()) + { + case 1: + // B layout + t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1); + break; + case 2: + // BC layout + t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1)); + break; + case 3: + // BWC layout + t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2)); + break; + case 4: + // BHWC layout + t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)); + break; + default: + break; + } + + tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type; + tflite::gpu::Layout layout = + t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC; + + tflite::gpu::ValueId id = ind.value(); + storage_type = + tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout); + auto dummy = std::make_shared<InferenceContextEx::DummyTensor>(); + dummy->shape = t_shape; + dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout}; + tensor_reserver_.Add(id, dummy); + + max_id = std::max(max_id, id); + + tensor_reserver_.SetNext(max_id + 1); + } + + ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; } + + InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; } + +private: + ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors; + InferenceContextEx::TensorReserverEx tensor_reserver_; + tflite::gpu::cl::CLContext *_context; +}; + +} // namespace gpu_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__ diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc new file mode 100644 index 000000000..e71733427 --- /dev/null +++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <queue> + +#include "TensorBuilder.h" + +#include "TensorManager.h" + +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" + +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include <ir/Operands.h> +#include <util/Utils.h> + +#include <cassert> +#include <stack> + +#include "util/logging.h" + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ + +using UsesType = cl_common::UsesType; + +TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr, + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info, + const std::shared_ptr<tflite::gpu::cl::Environment> &environment) + : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{ + environment} +{ + assert(_tensor_mgr); +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout, TensorType type) +{ + assert(_tensor_mgr->constTensors().size() == 0); + assert(_tensor_mgr->nonconstTensors().size() == 0); + + _uses_count_map[ind] = _operands.at(ind).getUses().size(); + + _tensor_info_map.emplace(ind, info); + _tensor_type_map.emplace(ind, type); + + _tensor_layout_map.insert({ind, backend_layout}); +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + _lifetime_seq.emplace_back(UsesType::FIRST, ind); +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + _lifetime_seq.emplace_back(UsesType::LAST, ind); +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { buildTensors(); } + +void TensorBuilder::allocate(void) +{ + auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map); + + for (auto &entry : lifetime_map) + { + auto &use = entry.second; + auto use_type = use.first; + auto use_index = use.second; + assert(use_index.valid()); + if (use_type == UsesType::FIRST) + _tensor_mgr->startLifetime(use_index); + else + _tensor_mgr->finishLifetime(use_index); + } + + _tensor_mgr->allocateConsts(); + + // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses + // After refactoring BackendContext we can uncomment this + // assert(_tensor_info_map.size() == + // _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map + + // _parent_map.size()); + _tensor_mgr->allocateNonconsts(); +} + +void TensorBuilder::postFunctionPrepare(void) { _tensor_mgr->tryDeallocConstants(); } + +void TensorBuilder::buildTensors(void) +{ + assert(_tensor_mgr->constTensors().size() == 0); + assert(_tensor_mgr->nonconstTensors().size() == 0); + // Normal tensors + for (auto &entry : _tensor_info_map) + { + auto ind = entry.first; + if (_parent_map.count(ind) > 0) + continue; + auto type = _tensor_type_map.at(ind); + const auto &info = entry.second; + _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_, + type); + } +} + +} // namespace gpu_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h index d55358191..2a5cb8b5e 100644 --- a/runtime/onert/backend/gpu_cl/TensorBuilder.h +++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h @@ -17,10 +17,13 @@ #ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__ #define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__ -#include <backend/basic/TensorBuilder.h> -#include "operand/ICLTensor.h" -#include "operand/CLTensor.h" -#include "ClTensorBuilder.h" +#include "TensorManager.h" + +#include <cl_common/LifetimeMap.h> +#include <cl_common/ParentInfo.h> + +#include <ir/Operands.h> +#include <ir/OperandIndexSequence.h> namespace onert { @@ -28,8 +31,76 @@ namespace backend { namespace gpu_cl { +class TensorBuilder +{ +public: + TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr, + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info, + const std::shared_ptr<tflite::gpu::cl::Environment> &environment); + + /** + * @brief Register tensor information to allocate on ACL-CL backend + * @param[in] ind Operand index + * @param[in] info Tensor information + * @param[in] layout Tensor data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout, TensorType type); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(); + void allocate(); + void postFunctionPrepare(); + + TensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); } + + void setUsesCount(const ir::OperandIndex &index, size_t num_uses) + { + assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses + : true); + _uses_count_map[index] = num_uses; + } + + void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map) + { + _parent_map = std::move(parent_map); + } + + bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq); + + /** + * @brief Check child tensor is allocated as subtensor of parent tensor + * @param[in] parent Index of parent + * @param[in] child Index of child + * @return @c true if child is allocated as subtensor of parent, otherwise @c false + */ + bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child); + +private: + void buildTensors(void); + ir::OperandIndex findRootParent(ir::OperandIndex index); + +private: + const ir::Operands &_operands; + ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; + ir::OperandIndexMap<ir::Layout> _tensor_layout_map; + ir::OperandIndexMap<TensorType> _tensor_type_map; + ir::OperandIndexMap<size_t> _uses_count_map; + + std::unique_ptr<TensorManager> _tensor_mgr; + tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info; + std::shared_ptr<tflite::gpu::cl::Environment> _environment; + + // for linear executor + cl_common::LifetimeSeq _lifetime_seq; -using TensorBuilder = ClTensorBuilder<operand::ICLTensor, operand::CLTensor>; + // Extra info for concat elimination + ir::OperandIndexMap<cl_common::ParentInfo> _parent_map; +}; } // namespace gpu_cl } // namespace backend diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h index 4700381dc..7290ff5da 100644 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h +++ b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h @@ -1,12 +1,11 @@ /* * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,8 +14,11 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__ +#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__ +#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__ + +#include "absl/status/status.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" namespace onert { @@ -25,20 +27,18 @@ namespace backend namespace gpu_cl { -enum class ConvWeightsLayout +enum TensorType { - kUnknown, - kOHWIOGroupI4O4, + TENSOR_TYPE_VALID = 0, + TENSOR_TYPE_INPUT = 1, + TENSOR_TYPE_OUTPUT = 2, + TENSOR_TYPE_DELETE = 3 }; -struct ConvWeightsDescription -{ - ConvWeightsLayout layout; - int output_group_size; -}; +absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis); } // namespace gpu_cl } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__ +#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__ diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc new file mode 100644 index 000000000..9fe0605ac --- /dev/null +++ b/runtime/onert/backend/gpu_cl/TensorManager.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorManager.h" + +#include <util/logging.h> + +#include <cassert> + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ + +TensorManager::TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr) + : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr} +{ + // DO NOTHING +} + +void TensorManager::allocateConsts(void) { _const_mgr->allocate(); } + +void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); } + +void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } + +void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } + +void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info, + std::shared_ptr<tflite::gpu::cl::Environment> environment, + tflite::gpu::cl::DeviceInfo &device_info, TensorType type) +{ + assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end()); + + if (info.isConstant()) + { + _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type); + _ind_to_mgr.insert({ind, *_const_mgr}); + } + else + { + _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type); + _ind_to_mgr.insert({ind, *_nonconst_mgr}); + } +} + +void TensorManager::startLifetime(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).startLifetime(ind); +} + +void TensorManager::finishLifetime(const ir::OperandIndex &ind) +{ + assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end()); + _ind_to_mgr.at(ind).finishLifetime(ind); +} + +std::shared_ptr<operand::ICLTensor> TensorManager::at(const ir::OperandIndex &ind) +{ + if (_ind_to_mgr.find(ind) == _ind_to_mgr.end()) + return nullptr; + + auto &tensors = _ind_to_mgr.at(ind).tensors(); + if (tensors.find(ind) != tensors.end()) + { + return tensors.at(ind); + } + + return nullptr; +} + +ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::constTensors(void) +{ + return _const_mgr->tensors(); +} + +ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconstTensors(void) +{ + return _nonconst_mgr->tensors(); +} + +std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind) +{ + if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value())) + { + return _nonconst_mgr->tensorReservers().Get(ind.value()); + } + else if (_const_mgr->tensorReservers().HaveTensor(ind.value())) + { + return _const_mgr->tensorReservers().Get(ind.value()); + } + return nullptr; +} + +InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void) +{ + return _const_mgr->tensorReservers(); +} + +InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void) +{ + return _nonconst_mgr->tensorReservers(); +} + +void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn) +{ + for (auto it : _nonconst_mgr->tensors()) + fn(it.first); + + for (auto it : _const_mgr->tensors()) + fn(it.first); +} + +void TensorManager::tryDeallocConstants(void) +{ + // NYI +} + +} // namespace gpu_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h index 111b5f8a7..52abc579a 100644 --- a/runtime/onert/backend/gpu_cl/TensorManager.h +++ b/runtime/onert/backend/gpu_cl/TensorManager.h @@ -14,15 +14,16 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CL_TENSOR_MANAGER_H__ -#define __ONERT_BACKEND_CL_TENSOR_MANAGER_H__ +#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__ +#define __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__ -#include "ClMemoryManager.h" -#include "ClTensorManager.h" -#include "open_cl/ClContext.h" -#include "operand/CLTensor.h" -#include "operand/ICLTensor.h" -#include "util/logging.h" +#include "MemoryManager.h" + +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" + +#include "ir/OperandInfo.h" +#include "ir/OperandIndexMap.h" namespace onert { @@ -31,13 +32,50 @@ namespace backend namespace gpu_cl { -using MemoryManager = ClMemoryManager<operand::ICLTensor, operand::CLTensor>; +class TensorManager +{ +public: + TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr); + + virtual ~TensorManager() = default; + + void allocateConsts(void); + void allocateNonconsts(void); + void deallocateConsts(void); + void deallocateNonconsts(void); + + void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, + tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info, + std::shared_ptr<tflite::gpu::cl::Environment> environment, + tflite::gpu::cl::DeviceInfo &device_info, TensorType type); + + std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind); + + void startLifetime(const ir::OperandIndex &ind); + void finishLifetime(const ir::OperandIndex &ind); + + std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind); + std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind); + + InferenceContextEx::TensorReserverEx &constTensorReservers(void); + InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void); + + ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void); + ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void); + + void iterate(const std::function<void(const ir::OperandIndex &)> &fn); + + void tryDeallocConstants(void); -using TensorManager = ClTensorManager<operand::ICLTensor, operand::CLTensor>; +private: + std::unique_ptr<MemoryManager> _const_mgr; + std::unique_ptr<MemoryManager> _nonconst_mgr; + ir::OperandIndexMap<MemoryManager &> _ind_to_mgr; +}; -inline TensorManager *createTensorManager(CLContext *context) +inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context) { - VERBOSE(createTensorManager) << "ClTensorManager" << std::endl; + VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl; return new TensorManager(new MemoryManager(context), new MemoryManager(context)); } @@ -45,4 +83,4 @@ inline TensorManager *createTensorManager(CLContext *context) } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__ +#endif // __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h index 1f0018bd1..6f17aff54 100644 --- a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h +++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h @@ -17,6 +17,8 @@ #ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__ #define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__ +#include "TensorManager.h" + #include "backend/ITensorRegistry.h" namespace onert @@ -27,14 +29,14 @@ namespace gpu_cl { /** - * @brief Tensor registry class for acl backends + * @brief Tensor registry class for gpu-cl backends * - * This is implemented as a wrapper of AclTensorManager. + * This is implemented as a wrapper of TensorManager. */ -template <typename T_ClTensorManager> class ClTensorRegistry : public ITensorRegistry +class TensorRegistry : public ITensorRegistry { public: - ClTensorRegistry(T_ClTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {} + TensorRegistry(TensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {} ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); } @@ -45,7 +47,7 @@ public: auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); } private: - T_ClTensorManager *_tensor_mgr; + TensorManager *_tensor_mgr; }; } // namespace gpu_cl diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h new file mode 100644 index 000000000..f67387904 --- /dev/null +++ b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__ +#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__ + +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "absl/strings/str_cat.h" + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ + +class InferenceContextEx : public tflite::gpu::cl::InferenceContext +{ +public: + struct DummyTensor + { + tflite::gpu::BHWC shape; + tflite::gpu::cl::TensorDescriptor descriptor; + + bool operator==(const DummyTensor &b) const + { + return shape == b.shape && descriptor == b.descriptor; + } + }; + + class TensorReserverEx + { + public: + tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy) + { + reservations_[next_] = dummy; + return next_++; + } + void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy) + { + reservations_[id] = dummy; + } + void SetNext(tflite::gpu::ValueId id) { next_ = id; } + bool HaveTensor(tflite::gpu::ValueId id) + { + return reservations_.find(id) != reservations_.end(); + } + std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; } + + std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> + GetTensorDescs() const + { + std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result; + for (auto &v : reservations_) + { + tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor; + desc.shape.b = v.second->shape.b; + desc.shape.h = v.second->shape.h; + desc.shape.w = v.second->shape.w; + desc.shape.d = 1; + desc.shape.c = v.second->shape.c; + result.push_back({v.first, desc}); + } + return result; + } + + void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> + &tensors) + { + for (auto &v : tensors) + { + auto dummy = std::make_shared<DummyTensor>(); + dummy->descriptor = v.second; + dummy->shape.b = v.second.shape.b; + dummy->shape.h = v.second.shape.h; + dummy->shape.w = v.second.shape.w; + dummy->shape.c = v.second.shape.c; + Add(v.first, dummy); + } + } + + private: + // absl::flat_hash_map<ValueId, DummyTensor> reservations_; + std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_; + tflite::gpu::ValueId next_ = 0; + }; +}; + +} // namespace gpu_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.cc b/runtime/onert/backend/gpu_cl/open_cl/Api.cc deleted file mode 100644 index 10bf87c38..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Api.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Api.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -struct ObjectTypeGetter -{ - ObjectType operator()(absl::monostate) const { return ObjectType::UNKNOWN; } - ObjectType operator()(OpenClBuffer) const { return ObjectType::OPENCL_BUFFER; } - ObjectType operator()(OpenClTexture) const { return ObjectType::OPENCL_TEXTURE; } - ObjectType operator()(CpuMemory) const { return ObjectType::CPU_MEMORY; } -}; - -struct ObjectValidityChecker -{ - bool operator()(absl::monostate) const { return false; } - bool operator()(OpenClBuffer obj) const { return obj.memobj; } - bool operator()(OpenClTexture obj) const { return obj.memobj; } - bool operator()(CpuMemory obj) const - { - return obj.data != nullptr && obj.size_bytes > 0 && - (data_type == DataType::UNKNOWN || obj.size_bytes % SizeOf(data_type) == 0); - } - DataType data_type; -}; - -} // namespace - -bool IsValid(const ObjectDef &def) -{ - return def.data_type != DataType::UNKNOWN && def.data_layout != DataLayout::UNKNOWN && - def.object_type != ObjectType::UNKNOWN; -} - -ObjectType GetType(const TensorObject &object) { return absl::visit(ObjectTypeGetter{}, object); } - -bool IsValid(const TensorObjectDef &def) { return IsValid(def.object_def); } - -bool IsValid(const TensorObjectDef &def, const TensorObject &object) -{ - return GetType(object) == def.object_def.object_type && - absl::visit(ObjectValidityChecker{def.object_def.data_type}, object); -} - -bool IsObjectPresent(ObjectType type, const TensorObject &obj) -{ - switch (type) - { - case ObjectType::CPU_MEMORY: - return absl::holds_alternative<CpuMemory>(obj); - case ObjectType::OPENCL_BUFFER: - return absl::holds_alternative<OpenClBuffer>(obj); - case ObjectType::OPENCL_TEXTURE: - return absl::holds_alternative<OpenClTexture>(obj); - case ObjectType::UNKNOWN: - return false; - } - return false; -} - -uint32_t NumElements(const TensorObjectDef &def) -{ - const auto &d = def.dimensions; - switch (def.object_def.data_layout) - { - case DataLayout::BHWC: - return d.product(); - case DataLayout::HWDC4: - case DataLayout::HDWC4: - case DataLayout::DHWC4: - return d.b * d.h * d.w * AlignByN(d.c, 4); - case DataLayout::UNKNOWN: - return 0; - } - return 0; -} - -int GetPosition(const InferenceOptions &options, InferencePriority p) -{ - if (options.priority1 == p) - return 1; - if (options.priority2 == p) - return 2; - if (options.priority3 == p) - return 3; - return 4; // least important -} - -PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1, - InferencePriority p2) -{ - int p1_position = GetPosition(options, p1); - int p2_position = GetPosition(options, p2); - if (p1_position == p2_position) - return PriorityImportance::UNKNOWN; - return p1_position < p2_position ? PriorityImportance::HIGHER : PriorityImportance::LOWER; -} - -bool IsValid(const InferenceOptions &options) -{ - if (options.usage == InferenceUsage::UNKNOWN) - { - return false; - } - if (options.priority1 == InferencePriority::UNKNOWN || - options.priority2 == InferencePriority::UNKNOWN || - options.priority3 == InferencePriority::UNKNOWN) - { - return false; - } - if (options.priority1 == InferencePriority::AUTO) - { - return false; - } - if (options.priority2 == InferencePriority::AUTO && options.priority3 != InferencePriority::AUTO) - { - return false; - } - if (options.priority1 == options.priority2 || options.priority1 == options.priority3) - { - return false; - } - if (options.priority2 == options.priority3 && options.priority2 != InferencePriority::AUTO) - { - return false; - } - return true; -} - -// Implementation note: this resolution logic is shared between GL and CL -// backends, but they might have own logic. Thus, the function is defined -// here just for code re-use purposes. -void ResolveAutoPriority(InferenceOptions *options) -{ - // priority1 can not be AUTO as it would make options invalid. - if (options->priority2 == InferencePriority::AUTO) - { - switch (options->priority1) - { - case InferencePriority::MIN_LATENCY: - options->priority2 = InferencePriority::MIN_MEMORY_USAGE; - options->priority3 = InferencePriority::MAX_PRECISION; - return; - case InferencePriority::MIN_MEMORY_USAGE: - options->priority2 = InferencePriority::MAX_PRECISION; - options->priority3 = InferencePriority::MIN_LATENCY; - return; - case InferencePriority::MAX_PRECISION: - options->priority2 = InferencePriority::MIN_LATENCY; - options->priority3 = InferencePriority::MIN_MEMORY_USAGE; - return; - case InferencePriority::UNKNOWN: - case InferencePriority::AUTO: - // Invalid and unreachable option. - return; - } - } - - if (options->priority3 == InferencePriority::AUTO) - { - // Simply add missing priority - if (GetPosition(*options, InferencePriority::MIN_LATENCY) == 4) - { - options->priority3 = InferencePriority::MIN_LATENCY; - } - else if (GetPosition(*options, InferencePriority::MAX_PRECISION) == 4) - { - options->priority3 = InferencePriority::MAX_PRECISION; - } - else if (GetPosition(*options, InferencePriority::MIN_MEMORY_USAGE) == 4) - { - options->priority3 = InferencePriority::MIN_MEMORY_USAGE; - } - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.h b/runtime/onert/backend/gpu_cl/open_cl/Api.h deleted file mode 100644 index 35be3d99c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Api.h +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_API_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_API_H__ - -// Usage example: -// -// // Builder is created from a model using GPU-specific parameters. -// std::unique_ptr<InferenceBuilder> builder = ...; -// -// // input data is coming from a texture -// // output data goes to CPU -// builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4, -// ObjectType::OPENGL_TEXTURE, true}); -// builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC, -// ObjectType::CPU_MEMORY, false}); -// std::unique_ptr<InferenceRunner> runner; -// RETURN_IF_ERROR(builder->Build(&runner)); // may take significant time. -// RETURN_IF_ERROR( -// runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format})); -// RETURN_IF_ERROR(runner->Run()); - -#include <cstdint> -#include <memory> -#include <vector> - -#include "absl/types/span.h" -#include "absl/types/variant.h" -#include "DataType.h" -#include "Status.h" -#include "Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// Common abbreviations: -// B - batch -// H - height -// W - width -// C - channels -// D - depth := DivideRoundUp(C, 4) -// C4 - is the constant = 4. -enum class DataLayout -{ - UNKNOWN, - BHWC, - DHWC4, - HWDC4, - HDWC4, -}; - -enum class ObjectType -{ - UNKNOWN, - CPU_MEMORY, - OPENCL_TEXTURE, - OPENCL_BUFFER, -}; - -struct OpenClBuffer -{ - OpenClBuffer() = default; - explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {} - - cl_mem memobj = nullptr; -}; - -struct OpenClTexture -{ - OpenClTexture() = default; - explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {} - - cl_mem memobj = nullptr; - // TODO(akulik): should it specify texture format? -}; - -struct CpuMemory -{ - CpuMemory() = default; - CpuMemory(void *new_data, size_t new_size_bytes) : data(new_data), size_bytes(new_size_bytes) {} - - void *data = nullptr; - size_t size_bytes = 0; -}; - -template <typename T> inline CpuMemory MakeCpuMemory(absl::Span<T> t) -{ - CpuMemory m; - m.data = t.data(); - m.size_bytes = t.size() * sizeof(T); - return m; -} - -template <typename T> inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t) -{ - CpuMemory m; - m.data = const_cast<T *>(t.data()); - m.size_bytes = t.size() * sizeof(T); - return m; -} - -// Defines object representation. -struct ObjectDef -{ - DataType data_type = DataType::UNKNOWN; - DataLayout data_layout = DataLayout::UNKNOWN; - ObjectType object_type = ObjectType::UNKNOWN; - - // If true, then object is managed externally and needs to be provided to - // InferenceRunner by a user before running inference. - // - // User-provided objects will not be re-used internally for any purpose to - // lower overall memory usage. - bool user_provided = false; - - bool operator==(const ObjectDef &other) const - { - return data_type == other.data_type && data_layout == other.data_layout && - object_type == other.object_type && user_provided == other.user_provided; - } -}; - -bool IsValid(const ObjectDef &def); - -struct Dimensions -{ - Dimensions() : b(1), h(1), w(1), c(1) {} - - Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels) - : b(batch), h(height), w(width), c(channels) - { - } - - int32_t d() const { return DivideRoundUp(c, 4); } - - int32_t product() const { return b * h * w * c; } - - bool operator==(const Dimensions &other) const - { - return b == other.b && h == other.h && w == other.w && c == other.c; - } - - int32_t b; - int32_t h; - int32_t w; - int32_t c; -}; - -// Connects tensor shape with corresponding object definition. -struct TensorObjectDef -{ - // Dimensions semantic is defined by corresponding DataLayout. - Dimensions dimensions; - ObjectDef object_def; - - bool operator==(const TensorObjectDef &other) const - { - return dimensions == other.dimensions && object_def == other.object_def; - } -}; - -// @return true if tensor object def is defined. -bool IsValid(const TensorObjectDef &def); - -// @return the number of elements in a tensor object. -uint32_t NumElements(const TensorObjectDef &def); - -using TensorObject = absl::variant<absl::monostate, CpuMemory, OpenClBuffer, OpenClTexture>; - -// @return true if object is set and corresponding values are defined. -bool IsValid(const TensorObjectDef &def, const TensorObject &object); - -ObjectType GetType(const TensorObject &object); - -// @return true if corresponding object is set for the given type -bool IsObjectPresent(ObjectType type, const TensorObject &obj); - -class InferenceRunner; - -// Allows to inspect and change input and output definitions before a graph is -// prepared for the inference. -class InferenceBuilder -{ -public: - virtual ~InferenceBuilder() {} - - // Returns inference graph inputs and outputs definitions. - virtual std::vector<TensorObjectDef> inputs() const = 0; - virtual std::vector<TensorObjectDef> outputs() const = 0; - - // Sets new shape for the input if underlying implementation and graph - // structure allows dynamic tensors. - virtual absl::Status SetInputShape(int index, const Dimensions &dimensions) = 0; - - // Updates object definitions for the given index. Implementation may allow - // to use different layouts and/or data type conversions between objects - // defined in a graph and given objects, for example: - // input '0' is DataType::FLOAT32, DataLayout::BHWC. - // A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4. - // An implementation may allow this transformation to happen automatically - // under the hood. - virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0; - virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0; - virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def) - { - auto input_defs = inputs(); - for (size_t i = 0; i < input_defs.size(); ++i) - { - RETURN_IF_ERROR(SetInputObjectDef(i, def)); - } - return absl::OkStatus(); - } - virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def) - { - auto output_defs = outputs(); - for (size_t i = 0; i < output_defs.size(); ++i) - { - RETURN_IF_ERROR(SetOutputObjectDef(i, def)); - } - return absl::OkStatus(); - } - - // Creates new instance of the inference runner. InferenceBuilder stays valid - // and could be used to create another inference runner if needed. - // - // This method may take significant time to prepare new inference runner. For - // example, it may require to compile OpenGL shaders. - virtual absl::Status Build(std::unique_ptr<InferenceRunner> *runner) = 0; -}; - -// Runs prepared inference. Every object marked as external needs to be set -// prior calling Run method. -class InferenceRunner -{ -public: - virtual ~InferenceRunner() {} - - // Returns inference graph inputs and outputs definitions. - virtual std::vector<TensorObjectDef> inputs() const = 0; - virtual std::vector<TensorObjectDef> outputs() const = 0; - - // Getters provide access to underlying objects for the given index. - // Setters allow to set or change external object for the given index. Note, - // object need to match object definition set before in InferenceBuilder. - - virtual absl::Status GetInputObject(int index, TensorObject *object) = 0; - virtual absl::Status GetOutputObject(int index, TensorObject *object) = 0; - virtual absl::Status SetInputObject(int index, TensorObject object) = 0; - virtual absl::Status SetOutputObject(int index, TensorObject object) = 0; - - virtual absl::Status Run() = 0; -}; - -// Encapsulated compilation/runtime tradeoffs. -enum class InferenceUsage -{ - UNKNOWN, - - // InferenceRunner will be used only once. Therefore, it is important to - // minimize bootstrap time as well. - FAST_SINGLE_ANSWER, - - // Prefer maximizing the throughput. Same inference runner will be used - // repeatedly on different inputs. - SUSTAINED_SPEED, -}; - -// Defines aspects to control while instantiating a runner. -enum class InferencePriority -{ - UNKNOWN, - - AUTO, - - MIN_LATENCY, - - MAX_PRECISION, - - MIN_MEMORY_USAGE, -}; - -struct InferenceOptions -{ - InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED; - - // Ordered priorities provide better understanding of desired semantics, - // where priority(n) is more important than priority(n+1). - // AUTO priority is needed when a single priority is the most important - // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving - // everything else to AUTO would result in configuration that achieves maximum - // performance. - // - // AUTO priority can only be used when higher priorities are fully specified. - // For example: - // VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO - // VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION, - // priority3 = AUTO - // INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO - // INVALID: priority1 = MIN_LATENCY, priority2 = AUTO, - // priority3 = MAX_PRECISION - // Invalid priorities will result in error. - InferencePriority priority1 = InferencePriority::MAX_PRECISION; - - InferencePriority priority2 = InferencePriority::AUTO; - - InferencePriority priority3 = InferencePriority::AUTO; -}; - -// Returns a position number for the priority. If priority is missing, -// then it it would return 'max num priorities + 1'. -int GetPosition(const InferenceOptions &options, InferencePriority p); - -// Return true if options are valid. -bool IsValid(const InferenceOptions &options); - -// Resolves AUTO priorities and specifies them explicitly. -// Note, no-one should assume that these mappings will not change. -// Technically this function is declared here for code re-use purposes and -// by no means it should be treated as canonical way to resolve AUTO. -void ResolveAutoPriority(InferenceOptions *options); - -enum class PriorityImportance -{ - UNKNOWN, - HIGHER, - LOWER, -}; - -// If both p1 and p2 are not present in options, return UNKNOWN -// If p1 is present, but p2 is not, return HIGHER -// If p2 is present, but p1 is not, return LOWER -// If both are present, and p1 is more important, return HIGHER, otherwise, -// LOWER. -PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1, - InferencePriority p2); -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_API_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc b/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc deleted file mode 100644 index a7f86bffc..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc +++ /dev/null @@ -1,926 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Arguments.h" - -#include "absl/strings/ascii.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_replace.h" -#include "absl/strings/str_split.h" -#include "absl/strings/substitute.h" - -#include "AccessType.h" -#include "TensorType.h" -#include "DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -namespace -{ - -bool IsWordSymbol(char symbol) { return absl::ascii_isalnum(symbol) || symbol == '_'; } - -std::string GetNextWord(const std::string &code, size_t first_position) -{ - size_t pos = first_position; - char t = code[pos]; - while (IsWordSymbol(t)) - { - pos++; - t = code[pos]; - } - return code.substr(first_position, pos - first_position); -} - -size_t FindEnclosingBracket(const std::string &text, size_t first_pos, char bracket) -{ - const std::map<char, char> brackets = { - {'(', ')'}, - {'{', '}'}, - {'[', ']'}, - {'<', '>'}, - }; - char b_open = bracket; - auto it = brackets.find(b_open); - if (it == brackets.end()) - { - return -1; - } - char b_close = it->second; - size_t pos = first_pos; - int opened = 1; - int closed = 0; - while (opened != closed && pos < text.size()) - { - if (text[pos] == b_open) - { - opened++; - } - else if (text[pos] == b_close) - { - closed++; - } - pos++; - } - if (opened == closed) - { - return pos; - } - else - { - return -1; - } -} - -absl::Status ParseArgsInsideBrackets(const std::string &text, size_t open_bracket_pos, - size_t *close_bracket_pos, std::vector<std::string> *args) -{ - *close_bracket_pos = FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]); - if (*close_bracket_pos == static_cast<size_t>(-1)) - { - return absl::NotFoundError("Not found enclosing bracket"); - } - std::string str_args = - text.substr(open_bracket_pos + 1, *close_bracket_pos - open_bracket_pos - 2); - std::vector<absl::string_view> words = absl::StrSplit(str_args, ','); - args->reserve(words.size()); - for (const auto &word : words) - { - absl::string_view arg = absl::StripAsciiWhitespace(word); - if (!arg.empty()) - { - args->push_back(std::string(arg)); - } - } - return absl::OkStatus(); -} - -void ReplaceAllWords(const std::string &old_word, const std::string &new_word, std::string *str) -{ - size_t position = str->find(old_word); - while (position != std::string::npos) - { - char prev = position == 0 ? '.' : (*str)[position - 1]; - char next = position + old_word.size() < str->size() ? (*str)[position + old_word.size()] : '.'; - if (IsWordSymbol(prev) || IsWordSymbol(next)) - { - position = str->find(old_word, position + 1); - continue; - } - str->replace(position, old_word.size(), new_word); - position = str->find(old_word, position + new_word.size()); - } -} - -std::string RenameArg(const std::vector<std::string> &object_names, const std::string &postfix, - const std::string &arg_name) -{ - for (const auto &object_name : object_names) - { - if (absl::StartsWith(arg_name, object_name) && arg_name.size() > object_name.size() && - arg_name[object_name.size()] == '_') - { - return object_name + postfix + - arg_name.substr(object_name.size(), arg_name.size() - object_name.size()); - } - } - return arg_name + postfix; -} - -void AppendArgument(const std::string &arg, std::string *args) -{ - if (!args->empty()) - { - absl::StrAppend(args, ",\n "); - } - absl::StrAppend(args, arg); -} - -std::string GetImageModifier(AccessType access) -{ - switch (access) - { - case AccessType::READ: - return "__read_only"; - case AccessType::WRITE: - return "__write_only"; - case AccessType::READ_WRITE: - return "__read_write"; - default: - throw std::runtime_error("Invalid AccessType"); - } -} - -std::string GetDefaultSamplers(const DeviceInfo &device_info) -{ - std::string result; - result += "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - if (device_info.IsAdreno3xx()) - { - // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and - // we can observe huge register overhead when compared to other modes. - - // While using CLK_ADDRESS_NONE with out-of-range image coordinates is - // undefined in the OpenCL specification, we have observed that - // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image - // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using - // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno - // 3xx. - result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - } - else - { - result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"; - } - - return result; -} - -} // namespace - -// Static -constexpr char Arguments::kArgsPrefix[]; - -Arguments::Arguments(Arguments &&args) - : int_values_(std::move(args.int_values_)), - shared_int4s_data_(std::move(args.shared_int4s_data_)), - float_values_(std::move(args.float_values_)), - shared_float4s_data_(std::move(args.shared_float4s_data_)), buffers_(std::move(args.buffers_)), - images2d_(std::move(args.images2d_)), image2d_arrays_(std::move(args.image2d_arrays_)), - images3d_(std::move(args.images3d_)), image_buffers_(std::move(args.image_buffers_)), - custom_memories_(std::move(args.custom_memories_)), object_refs_(std::move(args.object_refs_)), - objects_(std::move(args.objects_)) -{ -} -Arguments &Arguments::operator=(Arguments &&args) -{ - if (this != &args) - { - int_values_ = std::move(args.int_values_); - shared_int4s_data_ = std::move(args.shared_int4s_data_); - float_values_ = std::move(args.float_values_); - shared_float4s_data_ = std::move(args.shared_float4s_data_); - buffers_ = std::move(args.buffers_); - images2d_ = std::move(args.images2d_); - image2d_arrays_ = std::move(args.image2d_arrays_); - images3d_ = std::move(args.images3d_); - image_buffers_ = std::move(args.image_buffers_); - custom_memories_ = std::move(args.custom_memories_); - object_refs_ = std::move(args.object_refs_); - objects_ = std::move(args.objects_); - } - return *this; -} - -void Arguments::AddFloat(const std::string &name, float value) -{ - float_values_[name].value = value; -} -void Arguments::AddInt(const std::string &name, int value) { int_values_[name].value = value; } -void Arguments::AddBuffer(const std::string &name, const GPUBufferDescriptor &desc) -{ - buffers_[name] = desc; -} -void Arguments::AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc) -{ - images2d_[name] = desc; -} - -void Arguments::AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc) -{ - image2d_arrays_[name] = desc; -} - -void Arguments::AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc) -{ - images3d_[name] = desc; -} - -void Arguments::AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc) -{ - image_buffers_[name] = desc; -} - -void Arguments::AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc) -{ - custom_memories_[name] = desc; -} - -void Arguments::AddObjectRef(const std::string &name, AccessType access_type, - GPUObjectDescriptorPtr &&descriptor_ptr) -{ - descriptor_ptr->SetAccess(access_type); - object_refs_[name] = {std::move(descriptor_ptr)}; -} - -void Arguments::AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr) -{ - descriptor_ptr->SetAccess(AccessType::READ); - objects_[name] = {nullptr, std::move(descriptor_ptr)}; -} - -void Arguments::AddGPUResources(const std::string &name, const GPUResources &resources) -{ - for (const auto &r : resources.ints) - { - AddInt(absl::StrCat(name, "_", r)); - } - for (const auto &r : resources.floats) - { - AddFloat(absl::StrCat(name, "_", r)); - } - for (const auto &r : resources.buffers) - { - AddBuffer(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto &r : resources.images2d) - { - AddImage2D(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto &r : resources.image2d_arrays) - { - AddImage2DArray(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto &r : resources.images3d) - { - AddImage3D(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto &r : resources.image_buffers) - { - AddImageBuffer(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto &r : resources.custom_memories) - { - AddCustomMemory(absl::StrCat(name, "_", r.first), r.second); - } -} - -absl::Status Arguments::SetInt(const std::string &name, int value) -{ - auto it = int_values_.find(name); - if (it == int_values_.end()) - { - return absl::NotFoundError(absl::StrCat("No int argument with name - ", name)); - } - it->second.value = value; - if (it->second.active) - { - shared_int4s_data_[it->second.offset] = value; - } - return absl::OkStatus(); -} - -absl::Status Arguments::SetFloat(const std::string &name, float value) -{ - auto it = float_values_.find(name); - if (it == float_values_.end()) - { - return absl::NotFoundError(absl::StrCat("No float argument with name - ", name)); - } - it->second.value = value; - if (it->second.active) - { - shared_float4s_data_[it->second.offset] = value; - } - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage2D(const std::string &name, cl_mem memory) -{ - auto it = images2d_.find(name); - if (it == images2d_.end()) - { - return absl::NotFoundError(absl::StrCat("No image2D argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetBuffer(const std::string &name, cl_mem memory) -{ - auto it = buffers_.find(name); - if (it == buffers_.end()) - { - return absl::NotFoundError(absl::StrCat("No buffer argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage2DArray(const std::string &name, cl_mem memory) -{ - auto it = image2d_arrays_.find(name); - if (it == image2d_arrays_.end()) - { - return absl::NotFoundError(absl::StrCat("No image2D array argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage3D(const std::string &name, cl_mem memory) -{ - auto it = images3d_.find(name); - if (it == images3d_.end()) - { - return absl::NotFoundError(absl::StrCat("No image3D argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImageBuffer(const std::string &name, cl_mem memory) -{ - auto it = image_buffers_.find(name); - if (it == image_buffers_.end()) - { - return absl::NotFoundError(absl::StrCat("No image buffer argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetCustomMemory(const std::string &name, cl_mem memory) -{ - auto it = custom_memories_.find(name); - if (it == custom_memories_.end()) - { - return absl::NotFoundError(absl::StrCat("No custom memory argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetObjectRef(const std::string &name, const GPUObject *object) -{ - auto it = object_refs_.find(name); - if (it == object_refs_.end()) - { - return absl::NotFoundError(absl::StrCat("No object ref with name - ", name)); - } - GPUResourcesWithValue resources; - RETURN_IF_ERROR(object->GetGPUResources(it->second.descriptor.get(), &resources)); - return SetGPUResources(name, resources); -} - -absl::Status Arguments::SetGPUResources(const std::string &name, - const GPUResourcesWithValue &resources) -{ - for (const auto &r : resources.ints) - { - RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.floats) - { - RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.buffers) - { - RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.images2d) - { - RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.image2d_arrays) - { - RETURN_IF_ERROR(SetImage2DArray(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.images3d) - { - RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.image_buffers) - { - RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto &r : resources.custom_memories) - { - RETURN_IF_ERROR(SetCustomMemory(absl::StrCat(name, "_", r.first), r.second)); - } - return absl::OkStatus(); -} -void Arguments::RenameArgs(const std::string &postfix, std::string *code) const -{ - size_t next_position = code->find(kArgsPrefix); - while (next_position != std::string::npos) - { - size_t arg_pos = next_position + strlen(kArgsPrefix); - std::string arg_name = GetNextWord(*code, arg_pos); - code->replace(arg_pos, arg_name.size(), arg_name + postfix); - next_position = code->find(kArgsPrefix, arg_pos + arg_name.size()); - } -} - -absl::Status Arguments::Merge(Arguments &&args, const std::string &postfix) -{ - std::vector<std::string> object_names; - object_names.reserve(args.object_refs_.size() + args.objects_.size()); - for (auto &v : args.object_refs_) - { - object_names.push_back(v.first); - const std::string name = v.first + postfix; - if (object_refs_.find(name) != object_refs_.end()) - { - return absl::InvalidArgumentError( - absl::StrCat("Object reference name collision. Name - ", name)); - } - object_refs_[name] = {std::move(v.second.descriptor)}; - } - for (auto &v : args.objects_) - { - object_names.push_back(v.first); - const std::string name = v.first + postfix; - if (objects_.find(name) != objects_.end()) - { - return absl::InvalidArgumentError(absl::StrCat("Object name collision. Name - ", name)); - } - objects_[name] = {std::move(v.second.obj_ptr), std::move(v.second.descriptor)}; - } - for (const auto &v : args.int_values_) - { - AddInt(RenameArg(object_names, postfix, v.first), v.second.value); - } - for (const auto &v : args.float_values_) - { - AddFloat(RenameArg(object_names, postfix, v.first), v.second.value); - } - for (const auto &v : args.buffers_) - { - AddBuffer(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto &v : args.images2d_) - { - AddImage2D(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto &v : args.image2d_arrays_) - { - AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto &v : args.images3d_) - { - AddImage3D(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto &v : args.image_buffers_) - { - AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto &v : args.custom_memories_) - { - AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second); - } - return absl::OkStatus(); -} - -absl::Status Arguments::TransformToCLCode(const DeviceInfo &device_info, - const std::map<std::string, std::string> &linkables, - std::string *code) -{ - RETURN_IF_ERROR(AddObjectArgs()); - RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code)); - ResolveArgsPass(device_info, code); - *code = absl::Substitute(*code, GetListOfArgs()); - *code = GetDefaultSamplers(device_info) + *code; - return absl::OkStatus(); -} - -std::string Arguments::GetListOfArgs() -{ - std::string result; - for (auto &t : buffers_) - { - const std::string type_name = t.second.data_type == DataType::FLOAT32 ? "float" : "half"; - std::string attributes; - for (const auto &attr : t.second.attributes) - { - attributes += absl::StrCat(" __attribute__((", attr, "))"); - } - AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ", - ToCLDataType(t.second.data_type, t.second.element_size), "* ", - t.first, attributes), - &result); - } - for (auto &t : image_buffers_) - { - AppendArgument( - absl::StrCat(GetImageModifier(t.second.access_type), " image1d_buffer_t ", t.first), &result); - } - for (auto &t : images2d_) - { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image2d_t ", t.first), - &result); - } - for (auto &t : image2d_arrays_) - { - AppendArgument( - absl::StrCat(GetImageModifier(t.second.access_type), " image2d_array_t ", t.first), &result); - } - for (auto &t : images3d_) - { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image3d_t ", t.first), - &result); - } - for (auto &t : custom_memories_) - { - AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result); - } - for (uint32_t i = 0; i < shared_int4s_data_.size() / 4; ++i) - { - AppendArgument(absl::StrCat("int4 shared_int4_", i), &result); - } - for (uint32_t i = 0; i < shared_float4s_data_.size() / 4; ++i) - { - AppendArgument(absl::StrCat("float4 shared_float4_", i), &result); - } - return result; -} - -absl::Status Arguments::Bind(cl_kernel kernel, int offset) -{ - for (auto &t : buffers_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (auto &t : image_buffers_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (auto &t : images2d_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (auto &t : image2d_arrays_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (auto &t : images3d_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (auto &t : custom_memories_) - { - const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (size_t i = 0; i < shared_int4s_data_.size() / 4; ++i) - { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_int4s_data_[i * 4]); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - for (size_t i = 0; i < shared_float4s_data_.size() / 4; ++i) - { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_float4s_data_[i * 4]); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - offset, ")")); - } - offset++; - } - return absl::OkStatus(); -} - -std::string Arguments::AddActiveArgument(const std::string &arg_name, bool) -{ - { - auto it = int_values_.find(arg_name); - if (it != int_values_.end()) - { - int int_index; - if (it->second.active) - { - int_index = it->second.offset; - } - else - { - it->second.active = true; - it->second.offset = shared_int4s_data_.size(); - int_index = it->second.offset; - shared_int4s_data_.push_back(it->second.value); - } - std::string index = std::to_string(int_index / 4); - std::string postfixes[4] = {"x", "y", "z", "w"}; - return "shared_int4_" + index + "." + postfixes[int_index % 4]; - } - } - { - auto it = float_values_.find(arg_name); - if (it != float_values_.end()) - { - int float_index; - if (it->second.active) - { - float_index = it->second.offset; - } - else - { - it->second.active = true; - it->second.offset = shared_float4s_data_.size(); - float_index = it->second.offset; - shared_float4s_data_.push_back(it->second.value); - } - std::string index = std::to_string(float_index / 4); - std::string postfixes[4] = {"x", "y", "z", "w"}; - return "shared_float4_" + index + "." + postfixes[float_index % 4]; - } - } - return arg_name; -} - -void Arguments::ResolveArgsPass(const DeviceInfo &device_info, std::string *code) -{ - bool use_f32_for_half_arguments = device_info.IsPowerVR(); - size_t position = 0; - size_t next_position = code->find(kArgsPrefix); - while (next_position != std::string::npos) - { - size_t arg_pos = next_position; - next_position += strlen(kArgsPrefix); - std::string object_name = GetNextWord(*code, next_position); - std::string new_name = AddActiveArgument(object_name, use_f32_for_half_arguments); - code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name); - position = arg_pos + new_name.size(); - next_position = code->find(kArgsPrefix, position); - } - - int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4); - shared_int4s_data_.resize(shared_int4s_aligned_size); - int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4); - shared_float4s_data_.resize(shared_float4s_aligned_size); -} - -void Arguments::ResolveObjectNames(const std::string &object_name, - const std::vector<std::string> &member_names, std::string *code) -{ - for (const auto &member_name : member_names) - { - const std::string new_name = kArgsPrefix + object_name + "_" + member_name; - ReplaceAllWords(member_name, new_name, code); - } -} - -GPUObjectDescriptor *Arguments::GetObjectDescriptor(const std::string &object_name) const -{ - { - auto it = object_refs_.find(object_name); - if (it != object_refs_.end()) - { - return it->second.descriptor.get(); - } - } - { - auto it = objects_.find(object_name); - if (it != objects_.end()) - { - return it->second.descriptor.get(); - } - } - return nullptr; -} - -absl::Status Arguments::ResolveSelector(const std::map<std::string, std::string> &linkables, - const std::string &object_name, const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) -{ - const GPUObjectDescriptor *desc_ptr = GetObjectDescriptor(object_name); - if (!desc_ptr) - { - return absl::NotFoundError(absl::StrCat("No object with name - ", object_name)); - } - auto names = desc_ptr->GetGPUResources().GetNames(); - const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(desc_ptr); - if (tensor_desc && selector == "Write") - { - auto it = linkables.find(object_name); - if (it != linkables.end()) - { - if (desc_ptr->GetAccess() != AccessType::WRITE && - desc_ptr->GetAccess() != AccessType::READ_WRITE) - { - return absl::FailedPreconditionError( - absl::StrCat("Object with name - ", object_name, " should have Write access.")); - } - std::string value_name, x_coord, y_coord, s_coord; - RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(args, &value_name, &x_coord, - &y_coord, &s_coord)); - // x_coord can have batch size property of link_object - ResolveObjectNames(object_name, names, &x_coord); - *result = it->second; - ReplaceAllWords("in_out_value", value_name, result); - ReplaceAllWords("X_COORD", x_coord, result); - ReplaceAllWords("Y_COORD", y_coord, result); - ReplaceAllWords("S_COORD", s_coord, result); - RETURN_IF_ERROR(ResolveSelectorsPass({}, result)); - } - } - std::string patch; - RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, args, template_args, &patch)); - ResolveObjectNames(object_name, names, &patch); - *result += patch; - return absl::OkStatus(); -} - -absl::Status Arguments::ResolveSelectorsPass(const std::map<std::string, std::string> &linkables, - std::string *code) -{ - std::string result; - size_t position = 0; - size_t next_position = code->find(kArgsPrefix); - while (next_position != std::string::npos) - { - size_t arg_pos = next_position; - next_position += strlen(kArgsPrefix); - std::string object_name = GetNextWord(*code, next_position); - char next = (*code)[next_position + object_name.size()]; - if (next == '.') - { - next_position += object_name.size() + 1; - std::string selector_name = GetNextWord(*code, next_position); - next_position += selector_name.size(); - next = (*code)[next_position]; - std::vector<std::string> template_args; - if (next == '<') - { - size_t close_bracket_pos; - RETURN_IF_ERROR( - ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &template_args)); - next_position = close_bracket_pos; - next = (*code)[next_position]; - } - if (next != '(') - { - return absl::NotFoundError( - absl::StrCat("Expected ( after ", object_name, ".", selector_name, " call")); - } - std::vector<std::string> args; - size_t close_bracket_pos; - RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &args)); - for (auto &arg : args) - { - RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg)); - } - std::string patch; - RETURN_IF_ERROR( - ResolveSelector(linkables, object_name, selector_name, args, template_args, &patch)); - code->replace(arg_pos, close_bracket_pos - arg_pos, patch); - position = arg_pos + patch.size(); - } - else - { - position = arg_pos + strlen(kArgsPrefix); - } - next_position = code->find(kArgsPrefix, position); - } - return absl::OkStatus(); -} - -absl::Status Arguments::AllocateObjects(CLContext *context) -{ - for (auto &t : objects_) - { - RETURN_IF_ERROR(t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr)); - } - return absl::OkStatus(); -} - -void Arguments::ReleaseCPURepresentation() -{ - for (auto &t : objects_) - { - t.second.descriptor->Release(); - } -} - -absl::Status Arguments::AddObjectArgs() -{ - for (auto &t : objects_) - { - AddGPUResources(t.first, t.second.descriptor->GetGPUResources()); - GPUResourcesWithValue resources; - RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(), &resources)); - RETURN_IF_ERROR(SetGPUResources(t.first, resources)); - } - for (auto &t : object_refs_) - { - AddGPUResources(t.first, t.second.descriptor->GetGPUResources()); - } - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h b/runtime/onert/backend/gpu_cl/open_cl/Arguments.h deleted file mode 100644 index 0c6ce1edf..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__ - -#include <map> -#include <string> -#include <vector> - -#include "ClDevice.h" -#include "GpuObject.h" -#include "OpenclWrapper.h" - -#include "AccessType.h" -#include "Types.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class ArgumentsBinder -{ -public: - virtual absl::Status SetInt(const std::string &name, int value) = 0; - virtual absl::Status SetFloat(const std::string &name, float value) = 0; - virtual ~ArgumentsBinder() = default; -}; - -class Arguments : public ArgumentsBinder -{ -public: - Arguments() = default; - void AddFloat(const std::string &name, float value = 0.0f); - void AddInt(const std::string &name, int value = 0); - void AddObjectRef(const std::string &name, AccessType access_type, - GPUObjectDescriptorPtr &&descriptor_ptr); - void AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr); - - absl::Status SetInt(const std::string &name, int value) override; - absl::Status SetFloat(const std::string &name, float value) override; - absl::Status SetObjectRef(const std::string &name, const GPUObject *object); - - absl::Status Bind(cl_kernel kernel, int offset = 0); - - void RenameArgs(const std::string &postfix, std::string *code) const; - absl::Status Merge(Arguments &&args, const std::string &postfix); - - absl::Status AllocateObjects(CLContext *context); - void ReleaseCPURepresentation(); - absl::Status TransformToCLCode(const DeviceInfo &device_info, - const std::map<std::string, std::string> &linkables, - std::string *code); - - // Move only - Arguments(Arguments &&args); - Arguments &operator=(Arguments &&args); - Arguments(const Arguments &) = delete; - Arguments &operator=(const Arguments &) = delete; - - ~Arguments() override = default; - -private: - void AddBuffer(const std::string &name, const GPUBufferDescriptor &desc); - void AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc); - void AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc); - void AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc); - void AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc); - void AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc); - - absl::Status SetImage2D(const std::string &name, cl_mem memory); - absl::Status SetBuffer(const std::string &name, cl_mem memory); - absl::Status SetImage2DArray(const std::string &name, cl_mem memory); - absl::Status SetImage3D(const std::string &name, cl_mem memory); - absl::Status SetImageBuffer(const std::string &name, cl_mem memory); - absl::Status SetCustomMemory(const std::string &name, cl_mem memory); - - std::string GetListOfArgs(); - - std::string AddActiveArgument(const std::string &arg_name, bool use_f32_for_halfs); - void AddGPUResources(const std::string &name, const GPUResources &resources); - - absl::Status SetGPUResources(const std::string &name, const GPUResourcesWithValue &resources); - - absl::Status AddObjectArgs(); - - void ResolveArgsPass(const DeviceInfo &device_info, std::string *code); - absl::Status ResolveSelectorsPass(const std::map<std::string, std::string> &linkables, - std::string *code); - - absl::Status ResolveSelector(const std::map<std::string, std::string> &linkables, - const std::string &object_name, const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &template_args, std::string *result); - - void ResolveObjectNames(const std::string &object_name, - const std::vector<std::string> &member_names, std::string *code); - - GPUObjectDescriptor *GetObjectDescriptor(const std::string &object_name) const; - - static constexpr char kArgsPrefix[] = "args."; - - struct IntValue - { - int value; - - // many uniforms generated automatically and not used - // to reduce amount of data transferred we adding this optimization - bool active = false; - - // offset to shared uniform storage. - uint32_t offset = -1; - }; - std::map<std::string, IntValue> int_values_; - std::vector<int32_t> shared_int4s_data_; - - struct FloatValue - { - float value; - - // many uniforms generated automatically and not used - // to reduce amount of data transferred we adding this optimization - bool active = false; - - // offset to shared uniform storage. - uint32_t offset = -1; - }; - std::map<std::string, FloatValue> float_values_; - std::vector<float> shared_float4s_data_; - - std::map<std::string, GPUBufferDescriptor> buffers_; - std::map<std::string, GPUImage2DDescriptor> images2d_; - std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_; - std::map<std::string, GPUImage3DDescriptor> images3d_; - std::map<std::string, GPUImageBufferDescriptor> image_buffers_; - std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_; - - struct ObjectRefArg - { - GPUObjectDescriptorPtr descriptor; - }; - std::map<std::string, ObjectRefArg> object_refs_; - - struct ObjectArg - { - GPUObjectPtr obj_ptr; - GPUObjectDescriptorPtr descriptor; - }; - std::map<std::string, ObjectArg> objects_; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc b/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc deleted file mode 100644 index 64c071921..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Buffer.h" - -#include <string> - -#include "ClContext.h" -#include "DataType.h" -#include "GpuObject.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void *data, - CLContext *context, Buffer *result) -{ - cl_mem buffer; - RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes, gpu_read_only, - const_cast<void *>(data), &buffer)); - *result = Buffer(buffer, size_in_bytes); - - return absl::OkStatus(); -} - -} // namespace - -BufferDescriptor::BufferDescriptor(BufferDescriptor &&desc) - : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type), - element_size(desc.element_size), memory_type(desc.memory_type), - attributes(std::move(desc.attributes)), size(desc.size), data(std::move(desc.data)) -{ -} - -BufferDescriptor &BufferDescriptor::operator=(BufferDescriptor &&desc) -{ - if (this != &desc) - { - std::swap(element_type, desc.element_type); - std::swap(element_size, desc.element_size); - std::swap(memory_type, desc.memory_type); - attributes = std::move(desc.attributes); - std::swap(size, desc.size); - data = std::move(desc.data); - GPUObjectDescriptor::operator=(std::move(desc)); - } - return *this; -} - -void BufferDescriptor::Release() { data.clear(); } - -GPUResources BufferDescriptor::GetGPUResources() const -{ - GPUResources resources; - GPUBufferDescriptor desc; - desc.data_type = element_type; - desc.access_type = access_type_; - desc.element_size = element_size; - desc.memory_type = memory_type; - desc.attributes = attributes; - resources.buffers.push_back({"buffer", desc}); - return resources; -} - -absl::Status BufferDescriptor::PerformSelector(const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const -{ - if (selector == "Read") - { - return PerformReadSelector(args, result); - } - else if (selector == "GetPtr") - { - return PerformGetPtrSelector(args, template_args, result); - } - else - { - return absl::NotFoundError( - absl::StrCat("BufferDescriptor don't have selector with name - ", selector)); - } -} - -absl::Status BufferDescriptor::PerformReadSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (args.size() != 1) - { - return absl::NotFoundError( - absl::StrCat("BufferDescriptor Read require one argument, but ", args.size(), " was passed")); - } - *result = absl::StrCat("buffer[", args[0], "]"); - return absl::OkStatus(); -} - -absl::Status BufferDescriptor::PerformGetPtrSelector(const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const -{ - if (args.size() > 1) - { - return absl::NotFoundError(absl::StrCat( - "BufferDescriptor GetPtr require one or zero arguments, but ", args.size(), " was passed")); - } - if (template_args.size() > 1) - { - return absl::NotFoundError(absl::StrCat("BufferDescriptor GetPtr require one or zero teemplate " - "arguments, but ", - template_args.size(), " was passed")); - } - std::string conversion; - if (template_args.size() == 1) - { - const std::string type_name = ToCLDataType(element_type, element_size); - if (type_name != template_args[0]) - { - conversion = absl::StrCat("(", MemoryTypeToCLType(memory_type), " ", template_args[0], "*)&"); - } - } - if (args.empty()) - { - *result = absl::StrCat(conversion, "buffer"); - } - else if (conversion.empty()) - { - *result = absl::StrCat("(buffer + ", args[0], ")"); - } - else - { - *result = absl::StrCat(conversion, "buffer[", args[0], "]"); - } - return absl::OkStatus(); -} - -absl::Status BufferDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const -{ - Buffer gpu_buffer; - RETURN_IF_ERROR(gpu_buffer.CreateFromBufferDescriptor(*this, context)); - *result = absl::make_unique<Buffer>(std::move(gpu_buffer)); - return absl::OkStatus(); -} - -Buffer::Buffer(cl_mem buffer, size_t size_in_bytes) : buffer_(buffer), size_(size_in_bytes) {} - -Buffer::Buffer(Buffer &&buffer) : buffer_(buffer.buffer_), size_(buffer.size_) -{ - buffer.buffer_ = nullptr; - buffer.size_ = 0; -} - -Buffer &Buffer::operator=(Buffer &&buffer) -{ - if (this != &buffer) - { - Release(); - std::swap(size_, buffer.size_); - std::swap(buffer_, buffer.buffer_); - } - return *this; -} - -void Buffer::Release() -{ - if (buffer_) - { - clReleaseMemObject(buffer_); - buffer_ = nullptr; - size_ = 0; - } -} - -absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const -{ - const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr); - if (!buffer_desc) - { - return absl::InvalidArgumentError("Expected BufferDescriptor on input."); - } - - resources->buffers.push_back({"buffer", buffer_}); - return absl::OkStatus(); -} - -absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context) -{ - bool read_only = desc.memory_type == MemoryType::CONSTANT; - uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data()); - size_ = desc.size; - return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr, &buffer_); -} - -absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result) -{ - return CreateBuffer(size_in_bytes, true, nullptr, context, result); -} - -absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context, - Buffer *result) -{ - return CreateBuffer(size_in_bytes, true, data, context, result); -} - -absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result) -{ - return CreateBuffer(size_in_bytes, false, nullptr, context, result); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h b/runtime/onert/backend/gpu_cl/open_cl/Buffer.h deleted file mode 100644 index 39e97be6d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__ - -#include "absl/strings/str_cat.h" -#include "absl/types/span.h" - -#include "ClCommandQueue.h" -#include "ClContext.h" -#include "GpuObject.h" -#include "OpenclWrapper.h" -#include "DataType.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct BufferDescriptor : public GPUObjectDescriptor -{ - DataType element_type; - int element_size; - MemoryType memory_type = MemoryType::GLOBAL; - std::vector<std::string> attributes; - - // optional - int size = 0; - std::vector<uint8_t> data; - - BufferDescriptor() = default; - BufferDescriptor(const BufferDescriptor &) = default; - BufferDescriptor &operator=(const BufferDescriptor &) = default; - BufferDescriptor(BufferDescriptor &&desc); - BufferDescriptor &operator=(BufferDescriptor &&desc); - - absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const override; - - GPUResources GetGPUResources() const override; - absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const; - absl::Status PerformGetPtrSelector(const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const; - - absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override; - void Release() override; -}; - -// Buffer represent linear GPU data storage with arbitrary data format. -// Buffer is moveable but not copyable. -class Buffer : public GPUObject -{ -public: - Buffer() {} // just for using Buffer as a class members - Buffer(cl_mem buffer, size_t size_in_bytes); - - // Move only - Buffer(Buffer &&buffer); - Buffer &operator=(Buffer &&buffer); - Buffer(const Buffer &) = delete; - Buffer &operator=(const Buffer &) = delete; - - virtual ~Buffer() { Release(); } - - // for profiling and memory statistics - uint64_t GetMemorySizeInBytes() const { return size_; } - - cl_mem GetMemoryPtr() const { return buffer_; } - - // Writes data to a buffer. Data should point to a region that - // has exact size in bytes as size_in_bytes(constructor parameter). - template <typename T> absl::Status WriteData(CLCommandQueue *queue, const std::vector<T> *data); - - // Reads data from Buffer into CPU memory. - template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const; - - absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const override; - - absl::Status CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context); - -private: - void Release(); - - cl_mem buffer_ = nullptr; - size_t size_ = 0; -}; - -absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result); - -absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context, - Buffer *result); - -absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc deleted file mode 100644 index d147b7b13..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClCommandQueue.h" - -#include <algorithm> -#include <map> -#include <string> -#include <vector> -#include <limits> - -#include "absl/strings/str_cat.h" -#include "ClDevice.h" -#include "ClEvent.h" -#include "Util.h" -#include "Types.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -using namespace std; - -CLCommandQueue::CLCommandQueue(cl_command_queue queue, bool has_ownership) - : queue_(queue), has_ownership_(has_ownership) -{ -} - -CLCommandQueue::CLCommandQueue(CLCommandQueue &&queue) - : queue_(queue.queue_), has_ownership_(queue.has_ownership_) -{ - queue.queue_ = nullptr; -} - -CLCommandQueue &CLCommandQueue::operator=(CLCommandQueue &&queue) -{ - if (this != &queue) - { - Release(); - std::swap(queue_, queue.queue_); - has_ownership_ = queue.has_ownership_; - } - return *this; -} - -CLCommandQueue::~CLCommandQueue() { Release(); } - -void CLCommandQueue::Release() -{ - if (has_ownership_ && queue_) - { - clReleaseCommandQueue(queue_); - queue_ = nullptr; - } -} - -absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size, CLEvent *event) -{ - std::vector<size_t> local(3); - std::vector<size_t> global(3); - for (int i = 0; i < 3; ++i) - { - local[i] = work_group_size[i]; - global[i] = work_groups_count[i] * work_group_size[i]; - } - cl_event resulting_event; - const int error_code = - clEnqueueNDRangeKernel(queue_, kernel.kernel(), 3, nullptr, global.data(), local.data(), 0, - nullptr, event ? &resulting_event : nullptr); - if (event) - { - *event = CLEvent(resulting_event); - } - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clEnqueueNDRangeKernel - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size) -{ - return Dispatch(kernel, work_groups_count, work_group_size, nullptr); -} - -absl::Status CLCommandQueue::EnqueueEvent(CLEvent *event) -{ - cl_event resulting_event; - const int error_code = clEnqueueMarker(queue_, &resulting_event); - *event = CLEvent(resulting_event); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clEnqueueMarker - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region, const void *data) -{ - const size_t origin[] = {0, 0, 0}; - const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y), - static_cast<size_t>(region.z)}; - auto error_code = - clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ", - CLErrorCodeToString(error_code))); - } - - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region, void *data) -{ - const size_t origin[] = {0, 0, 0}; - const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y), - static_cast<size_t>(region.z)}; - auto error_code = - clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ", - CLErrorCodeToString(error_code))); - } - - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, - const void *data) -{ - auto error_code = - clEnqueueWriteBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data) -{ - auto error_code = - clEnqueueReadBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::WaitForCompletion() -{ - auto error_code = clFinish(queue_); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue) : CLCommandQueue(queue, true) -{ - events_.reserve(128); -} - -ProfilingCommandQueue::ProfilingCommandQueue(ProfilingCommandQueue &&queue) - : CLCommandQueue(std::move(queue)), events_(std::move(queue.events_)), - current_label_(std::move(queue.current_label_)) -{ -} - -ProfilingCommandQueue &ProfilingCommandQueue::operator=(ProfilingCommandQueue &&queue) -{ - if (this != &queue) - { - events_ = std::move(queue.events_); - current_label_ = std::move(queue.current_label_); - CLCommandQueue::operator=(std::move(queue)); - } - return *this; -} - -void ProfilingCommandQueue::SetEventsLabel(const std::string &name) { current_label_ = name; } - -void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); } - -absl::Status ProfilingCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size) -{ - events_.push_back(CLEvent()); - RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count, work_group_size, - &events_[events_.size() - 1])); - events_.back().SetName(current_label_); - return absl::OkStatus(); -} - -absl::Status -ProfilingCommandQueue::GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info, - const std::vector<int3> &work_groups_count, - const std::vector<int3> &work_group_sizes, int *index) -{ - // Some Adreno 3xx can have wrong numbers for some events - const bool possible_bug_with_events = device_info.IsAdreno3xx(); - events_.resize(work_group_sizes.size()); - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - RETURN_IF_ERROR( - CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i])); - - // reducing the speed of memory leak on Mali for some kernels - if (device_info.IsMali() && i % 8 == 7) - { - events_[i - 7].Wait(); - } - if (possible_bug_with_events) - { - // We are trying to increase probability for correct result. - RETURN_IF_ERROR(WaitForCompletion()); - } - } - - RETURN_IF_ERROR(WaitForCompletion()); - - // To release memory of some kernel pool on Mali. - if (device_info.IsMali()) - { - RETURN_IF_ERROR(kernel.ReInit()); - } - - int minimum_index = 0; - double minimum_time = std::numeric_limits<double>::max(); - if (possible_bug_with_events) - { // we will try to cut out suspicious results - double average_time = 0.0; - int average_samples_count = 0; - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - if (events_[i].GetEventTimeMs() < 100 * 1000) - { // 100 sec - average_time += events_[i].GetEventTimeMs(); - average_samples_count++; - } - } - if (average_samples_count == 0) - { - throw std::runtime_error("It cannot be divided by zero"); - } - else - { - average_time /= average_samples_count; - } - - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - double time = events_[i].GetEventTimeMs(); - if (time < minimum_time && time >= 0.1 * average_time) - { - minimum_index = i; - minimum_time = time; - } - } - } - else - { - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - double time = events_[i].GetEventTimeMs(); - if (time < minimum_time) - { - minimum_index = i; - minimum_time = time; - } - } - } - - *index = minimum_index; - - return absl::OkStatus(); -} - -absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context, - CLCommandQueue *result) -{ - int error_code; - cl_command_queue queue = clCreateCommandQueue(context.context(), device.id(), 0, &error_code); - if (!queue) - { - return absl::UnknownError( - absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code))); - } - *result = CLCommandQueue(queue, true); - return absl::OkStatus(); -} - -double ProfilingCommandQueue::GetQueueExecutionTimeMs() const -{ - const uint64_t start = events_.front().GetStartedTimeNs(); - const uint64_t end = events_.back().GetFinishedTimeNs(); - const uint64_t time_ns = (end - start); - - return static_cast<double>(time_ns) / 1000000.0; -} - -double ProfilingCommandQueue::GetSumOfEventsTimeMs() const -{ - double sum = 0.0; - for (uint32_t i = 0; i < events_.size(); ++i) - { - sum += events_[i].GetEventTimeMs(); - } - return sum; -} - -absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context, - ProfilingCommandQueue *result) -{ - int error_code; - cl_command_queue queue = - clCreateCommandQueue(context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code); - if (!queue) - { - return absl::UnknownError( - absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code))); - } - - *result = ProfilingCommandQueue(queue); - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h deleted file mode 100644 index 81f93fd23..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__ - -#include <cstdint> -#include <string> -#include <vector> - -#include "absl/time/time.h" -#include "ClContext.h" -#include "ClDevice.h" -#include "ClEvent.h" -#include "ClKernel.h" -#include "OpenclWrapper.h" -#include "Types.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct ProfilingInfo -{ - struct DispatchInfo - { - std::string label; - absl::Duration duration; - }; - - std::vector<DispatchInfo> dispatches; - - absl::Duration GetTotalTime() const; - - // Returns report (string of lines delimited by \n) - // This method uses GPU counters and measure GPU time only. - // Report has next structure: - // Per kernel timing(K kernels): - // conv2d 3.2ms - // ... - // -------------------- - // Accumulated time per operation type: - // conv2d - 14.5ms - // .... - // -------------------- - // Ideal total time: 23.4ms // Total time for all kernels - std::string GetDetailedReport() const; -}; - -// A wrapper around opencl command queue -class CLCommandQueue -{ -public: - CLCommandQueue() {} - CLCommandQueue(cl_command_queue queue, bool has_ownership); - - // Move only - CLCommandQueue(CLCommandQueue &&queue); - CLCommandQueue &operator=(CLCommandQueue &&queue); - CLCommandQueue(const CLCommandQueue &) = delete; - CLCommandQueue &operator=(const CLCommandQueue &) = delete; - - virtual ~CLCommandQueue(); - - cl_command_queue queue() const { return queue_; } - - virtual absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size); - - absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size, CLEvent *event); - - absl::Status EnqueueEvent(CLEvent *event); - - absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void *data); - absl::Status EnqueueReadImage(cl_mem memory, int3 region, void *data); - - absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, const void *data); - absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data); - - absl::Status WaitForCompletion(); - -protected: - void Release(); - - cl_command_queue queue_ = nullptr; - bool has_ownership_ = false; -}; - -class ProfilingCommandQueue : public CLCommandQueue -{ -public: - ProfilingCommandQueue() {} - explicit ProfilingCommandQueue(cl_command_queue queue); - - // Move only - ProfilingCommandQueue(ProfilingCommandQueue &&queue); - ProfilingCommandQueue &operator=(ProfilingCommandQueue &&queue); - ProfilingCommandQueue(const ProfilingCommandQueue &) = delete; - ProfilingCommandQueue &operator=(const ProfilingCommandQueue &) = delete; - - absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size) override; - - // will write index for fastest work_group among work_group_sizes - absl::Status GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info, - const std::vector<int3> &work_groups_count, - const std::vector<int3> &work_group_sizes, int *index); - - // call ResetMeasurements() to start new seriese of measurements - void ResetMeasurements(); - - double GetQueueExecutionTimeMs() const; - - // Difference from GetQueueExecutionTimeMs is that this number doesn't include - // time between kernels(kernels launches or preparing) on GPU. Usually, this - // time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10% - // spend on something else(maybe kernels launches or preparing) - double GetSumOfEventsTimeMs() const; - - // This label will be used for all subsequent dispatches. - void SetEventsLabel(const std::string &name); - -private: - std::vector<CLEvent> events_; - std::string current_label_; -}; - -absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context, - CLCommandQueue *result); - -absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context, - ProfilingCommandQueue *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc b/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc deleted file mode 100644 index 3289ff914..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClContext.h" - -#include "absl/strings/str_cat.h" -#include "ClImageFormat.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context, cl_mem_flags flags) -{ - cl_uint num_image_formats; - cl_int error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, - &num_image_formats); - if (error != CL_SUCCESS) - { - return {}; - } - - std::vector<cl_image_format> result(num_image_formats); - error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, num_image_formats, - &result[0], nullptr); - if (error != CL_SUCCESS) - { - return {}; - } - return result; -} - -bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, int num_channels) -{ - return image_format.image_channel_data_type == ToImageChannelType(data_type) && - image_format.image_channel_order == ToChannelOrder(num_channels); -} - -void AddSupportedImageFormats(cl_context context, DeviceInfo *info) -{ - auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE); - for (auto format : supported_formats) - { - info->supports_r_f16_tex2d = - info->supports_r_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 1); - info->supports_rg_f16_tex2d = - info->supports_rg_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 2); - info->supports_rgb_f16_tex2d = - info->supports_rgb_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 3); - info->supports_rgba_f16_tex2d = - info->supports_rgba_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 4); - info->supports_r_f32_tex2d = - info->supports_r_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 1); - info->supports_rg_f32_tex2d = - info->supports_rg_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 2); - info->supports_rgb_f32_tex2d = - info->supports_rgb_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 3); - info->supports_rgba_f32_tex2d = - info->supports_rgba_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 4); - } -} - -absl::Status CreateCLContext(const CLDevice &device, cl_context_properties *properties, - CLContext *result) -{ - int error_code; - cl_device_id device_id = device.id(); - cl_context context = clCreateContext(properties, 1, &device_id, nullptr, nullptr, &error_code); - if (!context) - { - return absl::UnknownError( - absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code))); - } - AddSupportedImageFormats(context, &device.info_); - - *result = CLContext(context, true); - return absl::OkStatus(); -} - -} // namespace - -CLContext::CLContext(cl_context context, bool has_ownership) - : context_(context), has_ownership_(has_ownership) -{ -} - -CLContext::CLContext(CLContext &&context) - : context_(context.context_), has_ownership_(context.has_ownership_) -{ - context.context_ = nullptr; -} - -CLContext &CLContext::operator=(CLContext &&context) -{ - if (this != &context) - { - Release(); - std::swap(context_, context.context_); - has_ownership_ = context.has_ownership_; - } - return *this; -} - -CLContext::~CLContext() { Release(); } - -void CLContext::Release() -{ - if (has_ownership_ && context_) - { - clReleaseContext(context_); - context_ = nullptr; - } -} - -bool CLContext::IsFloatTexture2DSupported(int num_channels, DataType data_type, - cl_mem_flags flags) const -{ - auto supported_formats = GetSupportedImage2DFormats(context_, flags); - for (auto format : supported_formats) - { - if (format.image_channel_data_type == ToImageChannelType(data_type) && - format.image_channel_order == ToChannelOrder(num_channels)) - { - return true; - } - } - - return false; -} - -absl::Status CreateCLContext(const CLDevice &device, CLContext *result) -{ - return CreateCLContext(device, nullptr, result); -} - -absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context, - cl_context_properties egl_display, CLContext *result) -{ - if (!device.SupportsExtension("cl_khr_gl_sharing")) - { - return absl::UnavailableError("Device doesn't support CL-GL sharing."); - } - cl_context_properties platform = reinterpret_cast<cl_context_properties>(device.platform()); - cl_context_properties props[] = {CL_GL_CONTEXT_KHR, - egl_context, - CL_EGL_DISPLAY_KHR, - egl_display, - CL_CONTEXT_PLATFORM, - platform, - 0}; - return CreateCLContext(device, props, result); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h b/runtime/onert/backend/gpu_cl/open_cl/ClContext.h deleted file mode 100644 index cf1d0d2d2..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__ - -#include "ClDevice.h" -#include "OpenclWrapper.h" -#include "DataType.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// A RAII wrapper around opencl context -class CLContext -{ -public: - CLContext() {} - CLContext(cl_context context, bool has_ownership); - - // Move only - CLContext(CLContext &&context); - CLContext &operator=(CLContext &&context); - CLContext(const CLContext &) = delete; - CLContext &operator=(const CLContext &) = delete; - - ~CLContext(); - - cl_context context() const { return context_; } - - bool IsFloatTexture2DSupported(int num_channels, DataType data_type, - cl_mem_flags flags = CL_MEM_READ_WRITE) const; - -private: - void Release(); - - cl_context context_ = nullptr; - bool has_ownership_ = false; -}; - -absl::Status CreateCLContext(const CLDevice &device, CLContext *result); -absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context, - cl_context_properties egl_display, CLContext *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc deleted file mode 100644 index 8dede139c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc +++ /dev/null @@ -1,448 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClDevice.h" - -#include <algorithm> -#include <string> -#include <vector> - -#include "Util.h" -#include "Status.h" - -#include "absl/strings/numbers.h" -#include "absl/strings/str_split.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <> std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) -{ - size_t size; - cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size); - if (error != CL_SUCCESS) - { - return ""; - } - - std::string result(size - 1, 0); - error = clGetDeviceInfo(id, info, size, &result[0], nullptr); - if (error != CL_SUCCESS) - { - return ""; - } - return result; -} - -namespace -{ -template <typename T> T GetPlatformInfo(cl_platform_id id, cl_platform_info info) -{ - T result; - cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr); - if (error != CL_SUCCESS) - { - return -1; - } - return result; -} - -std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) -{ - size_t size; - cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size); - if (error != CL_SUCCESS) - { - return ""; - } - - std::string result(size - 1, 0); - error = clGetPlatformInfo(id, info, size, &result[0], nullptr); - if (error != CL_SUCCESS) - { - return ""; - } - return result; -} - -void GetDeviceWorkDimsSizes(cl_device_id id, int3 *result) -{ - int dims_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); - if (dims_count < 3) - { - return; - } - std::vector<size_t> limits(dims_count); - cl_int error = clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * dims_count, - limits.data(), nullptr); - if (error != CL_SUCCESS) - { - return; - } - // dims_count must be at least 3 according to spec - result->x = limits[0]; - result->y = limits[1]; - result->z = limits[2]; -} - -OpenCLVersion ParseCLVersion(const std::string &version) -{ - const auto first_dot_pos = version.find_first_of('.'); - if (first_dot_pos == std::string::npos) - { - return OpenCLVersion::CL_1_0; - } - const int major = version[first_dot_pos - 1] - '0'; - const int minor = version[first_dot_pos + 1] - '0'; - - if (major == 1) - { - if (minor == 2) - { - return OpenCLVersion::CL_1_2; - } - else if (minor == 1) - { - return OpenCLVersion::CL_1_1; - } - else - { - return OpenCLVersion::CL_1_0; - } - } - else if (major == 2) - { - if (minor == 2) - { - return OpenCLVersion::CL_2_2; - } - else if (minor == 1) - { - return OpenCLVersion::CL_2_1; - } - else - { - return OpenCLVersion::CL_2_0; - } - } - else if (major == 3) - { - return OpenCLVersion::CL_3_0; - } - else - { - return OpenCLVersion::CL_1_0; - } -} - -Vendor ParseVendor(const std::string &device_name, const std::string &vendor_name) -{ - std::string d_name = device_name; - std::string v_name = vendor_name; - std::transform(d_name.begin(), d_name.end(), d_name.begin(), ::tolower); - std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower); - if (d_name.find("qualcomm") != std::string::npos || v_name.find("qualcomm") != std::string::npos) - { - return Vendor::kQualcomm; - } - else if (d_name.find("mali") != std::string::npos || v_name.find("mali") != std::string::npos) - { - return Vendor::kMali; - } - else if (d_name.find("power") != std::string::npos || v_name.find("power") != std::string::npos) - { - return Vendor::kPowerVR; - } - else if (d_name.find("nvidia") != std::string::npos || v_name.find("nvidia") != std::string::npos) - { - return Vendor::kNvidia; - } - else if (d_name.find("advanced micro devices") != std::string::npos || - v_name.find("advanced micro devices") != std::string::npos) - { - return Vendor::kAMD; - } - else if (d_name.find("intel") != std::string::npos || v_name.find("intel") != std::string::npos) - { - return Vendor::kIntel; - } - else - { - return Vendor::kUnknown; - } -} - -// check that gpu_version belong to range min_version-max_version -// min_version is included and max_version is excluded. -bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) -{ - return gpu_version >= min_version && gpu_version < max_version; -} -} // namespace - -DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) -{ - DeviceInfo info; - const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME); - const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR); - const auto opencl_c_version = GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION); - info.vendor = ParseVendor(device_name, vendor_name); - if (info.vendor == Vendor::kQualcomm) - { - info.adreno_info = AdrenoInfo(opencl_c_version); - } - else if (info.vendor == Vendor::kMali) - { - info.mali_info = MaliInfo(device_name); - } - info.cl_version = ParseCLVersion(opencl_c_version); - info.extensions = absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' '); - - info.supports_fp16 = false; - info.supports_image3d_writes = false; - for (const auto &ext : info.extensions) - { - if (ext == "cl_khr_fp16") - { - info.supports_fp16 = true; - } - if (ext == "cl_khr_3d_image_writes") - { - info.supports_image3d_writes = true; - } - } - - cl_device_fp_config f32_config = - GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG); - info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST; - - if (info.supports_fp16) - { - cl_device_fp_config f16_config; - auto status = GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_HALF_FP_CONFIG, &f16_config); - // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty. - if (status.ok() && info.vendor != Vendor::kAMD) - { - info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST; - } - else - { // happens on PowerVR - f16_config = f32_config; - info.supports_fp16_rtn = info.supports_fp32_rtn; - } - } - else - { - info.supports_fp16_rtn = false; - } - - if (info.vendor == Vendor::kPowerVR && !info.supports_fp16) - { - // PowerVR doesn't have full support of fp16 and so doesn't list this - // extension. But it can support fp16 in MADs and as buffers/textures types, - // so we will use it. - info.supports_fp16 = true; - info.supports_fp16_rtn = info.supports_fp32_rtn; - } - - if (!info.supports_image3d_writes && - ((info.vendor == Vendor::kQualcomm && - IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) || - info.vendor == Vendor::kNvidia)) - { - // in local tests Adreno 430 can write in image 3d, at least on small sizes, - // but it doesn't have cl_khr_3d_image_writes in list of available - // extensions - // The same for NVidia - info.supports_image3d_writes = true; - } - info.compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS); - info.image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH); - info.image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - info.buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE); - if (info.cl_version >= OpenCLVersion::CL_1_2) - { - info.image_buffer_max_size = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE); - info.image_array_max_layers = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE); - } - info.image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH); - info.image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - info.image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH); - int3 max_work_group_sizes; - GetDeviceWorkDimsSizes(id, &max_work_group_sizes); - info.max_work_group_size_x = max_work_group_sizes.x; - info.max_work_group_size_y = max_work_group_sizes.y; - info.max_work_group_size_z = max_work_group_sizes.z; - - if (info.IsIntel()) - { - if (info.SupportsExtension("cl_intel_required_subgroup_size")) - { - size_t sub_groups_count; - cl_int status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, nullptr, - &sub_groups_count); - if (status == CL_SUCCESS) - { - std::vector<size_t> sub_group_sizes(sub_groups_count); - status = - clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, - sizeof(size_t) * sub_groups_count, sub_group_sizes.data(), nullptr); - if (status == CL_SUCCESS) - { - for (size_t i = 0; i < sub_groups_count; ++i) - { - info.supported_subgroup_sizes.push_back(sub_group_sizes[i]); - } - } - } - } - } - return info; -} - -CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id) - : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) -{ -} - -CLDevice::CLDevice(const CLDevice &device) - : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) -{ -} - -CLDevice &CLDevice::operator=(const CLDevice &device) -{ - if (this != &device) - { - info_ = device.info_; - id_ = device.id_; - platform_id_ = device.platform_id_; - } - return *this; -} - -CLDevice::CLDevice(CLDevice &&device) - : info_(std::move(device.info_)), id_(device.id_), platform_id_(device.platform_id_) -{ - device.id_ = nullptr; - device.platform_id_ = nullptr; -} - -CLDevice &CLDevice::operator=(CLDevice &&device) -{ - if (this != &device) - { - id_ = nullptr; - platform_id_ = nullptr; - info_ = std::move(device.info_); - std::swap(id_, device.id_); - std::swap(platform_id_, device.platform_id_); - } - return *this; -} - -bool CLDevice::SupportsFP16() const { return info_.supports_fp16; } - -bool CLDevice::SupportsExtension(const std::string &extension) const -{ - return info_.SupportsExtension(extension); -} - -bool CLDevice::SupportsTextureArray() const { return info_.SupportsTextureArray(); } - -bool CLDevice::SupportsImageBuffer() const { return info_.SupportsImageBuffer(); } - -bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); } - -bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; } - -bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; } - -std::string CLDevice::GetPlatformVersion() const -{ - return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION); -} - -bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); } - -bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const -{ - return info_.SupportsSubGroupWithSize(sub_group_size); -} - -bool CLDevice::IsAdreno() const { return info_.IsAdreno(); } - -bool CLDevice::IsAdreno3xx() const { return info_.IsAdreno3xx(); } - -bool CLDevice::IsAdreno4xx() const { return info_.IsAdreno4xx(); } - -bool CLDevice::IsAdreno5xx() const { return info_.IsAdreno5xx(); } - -bool CLDevice::IsAdreno6xx() const { return info_.IsAdreno6xx(); } - -bool CLDevice::IsAdreno6xxOrHigher() const { return info_.IsAdreno6xxOrHigher(); } - -bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); } - -bool CLDevice::IsNvidia() const { return info_.IsNvidia(); } - -bool CLDevice::IsMali() const { return info_.IsMali(); } - -bool CLDevice::IsAMD() const { return info_.IsAMD(); } - -bool CLDevice::IsIntel() const { return info_.IsIntel(); } - -bool CLDevice::SupportsOneLayerTextureArray() const { return info_.SupportsOneLayerTextureArray(); } - -void CLDevice::DisableOneLayerTextureArray() -{ - info_.adreno_info.support_one_layer_texture_array = false; -} - -absl::Status CreateDefaultGPUDevice(CLDevice *result) -{ - cl_uint num_platforms; - clGetPlatformIDs(0, nullptr, &num_platforms); - if (num_platforms == 0) - { - return absl::UnknownError("No supported OpenCL platform."); - } - std::vector<cl_platform_id> platforms(num_platforms); - clGetPlatformIDs(num_platforms, platforms.data(), nullptr); - - cl_platform_id platform_id = platforms[0]; - cl_uint num_devices; - clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices); - if (num_devices == 0) - { - return absl::UnknownError("No GPU on current platform."); - } - - std::vector<cl_device_id> devices(num_devices); - clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(), nullptr); - - *result = CLDevice(devices[0], platform_id); - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h deleted file mode 100644 index 6e740fe97..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__ - -#include <string> -#include <vector> - -#include "DeviceInfo.h" -#include "OpenclWrapper.h" -#include "Util.h" -#include "Types.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// A wrapper around opencl device id -class CLDevice -{ -public: - CLDevice() = default; - CLDevice(cl_device_id id, cl_platform_id platform_id); - - CLDevice(CLDevice &&device); - CLDevice &operator=(CLDevice &&device); - CLDevice(const CLDevice &); - CLDevice &operator=(const CLDevice &); - - ~CLDevice() {} - - cl_device_id id() const { return id_; } - cl_platform_id platform() const { return platform_id_; } - std::string GetPlatformVersion() const; - - Vendor vendor() const { return info_.vendor; } - OpenCLVersion cl_version() const { return info_.cl_version; } - bool SupportsFP16() const; - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - bool SupportsExtension(const std::string &extension) const; - bool SupportsFP32RTN() const; - bool SupportsFP16RTN() const; - bool IsCL20OrHigher() const; - bool SupportsSubGroupWithSize(int sub_group_size) const; - bool IsAdreno() const; - bool IsAdreno3xx() const; - bool IsAdreno4xx() const; - bool IsAdreno5xx() const; - bool IsAdreno6xx() const; - bool IsAdreno6xxOrHigher() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsMali() const; - bool IsAMD() const; - bool IsIntel() const; - - // To track bug on some Adreno. b/131099086 - bool SupportsOneLayerTextureArray() const; - void DisableOneLayerTextureArray(); - - const DeviceInfo &GetInfo() const { return info_; } - // We update device info during context creation, so as supported texture - // formats can be requested from context only. - mutable DeviceInfo info_; - -private: - cl_device_id id_ = nullptr; - cl_platform_id platform_id_ = nullptr; -}; - -absl::Status CreateDefaultGPUDevice(CLDevice *result); - -template <typename T> T GetDeviceInfo(cl_device_id id, cl_device_info info) -{ - T result; - cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr); - if (error != CL_SUCCESS) - { - return -1; - } - return result; -} - -template <typename T> absl::Status GetDeviceInfo(cl_device_id id, cl_device_info info, T *result) -{ - cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr); - if (error != CL_SUCCESS) - { - return absl::InvalidArgumentError(CLErrorCodeToString(error)); - } - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h b/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h deleted file mode 100644 index 48cd2fb00..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__ - -#include <string> - -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// @return if error_code is success, then return OK status. Otherwise translates -// error code into a message. -inline absl::Status GetOpenCLError(cl_int error_code) -{ - if (error_code == CL_SUCCESS) - { - return absl::OkStatus(); - } - return absl::InternalError("OpenCL error: " + CLErrorCodeToString(error_code)); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc deleted file mode 100644 index beb64a9a8..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClEvent.h" - -#include "OpenclWrapper.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -CLEvent::CLEvent(cl_event event) : event_(event) {} - -CLEvent::CLEvent(CLEvent &&event) : event_(event.event_), name_(std::move(event.name_)) -{ - event.event_ = nullptr; -} - -CLEvent &CLEvent::operator=(CLEvent &&event) -{ - if (this != &event) - { - Release(); - std::swap(event_, event.event_); - name_ = std::move(event.name_); - } - return *this; -} - -uint64_t CLEvent::GetStartedTimeNs() const -{ - cl_ulong time_ns; - clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time_ns, nullptr); - return time_ns; -} - -uint64_t CLEvent::GetFinishedTimeNs() const -{ - cl_ulong time_ns; - clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time_ns, nullptr); - return time_ns; -} - -double CLEvent::GetEventTimeMs() const -{ - const uint64_t start = GetStartedTimeNs(); - const uint64_t end = GetFinishedTimeNs(); - const uint64_t time_ns = (end - start); - - return static_cast<double>(time_ns) * 1e-6; -} - -uint64_t CLEvent::GetEventTimeNs() const { return GetFinishedTimeNs() - GetStartedTimeNs(); } - -void CLEvent::SetName(const std::string &name) { name_ = name; } - -void CLEvent::Wait() const { clWaitForEvents(1, &event_); } - -CLEvent::~CLEvent() { Release(); } - -void CLEvent::Release() -{ - if (event_) - { - clReleaseEvent(event_); - event_ = nullptr; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h deleted file mode 100644 index 265409ffe..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__ - -#include <cstdint> -#include <string> - -#include "OpenclWrapper.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// A RAII wrapper around opencl event -class CLEvent -{ -public: - CLEvent() {} - explicit CLEvent(cl_event event); - - // Move only - CLEvent(CLEvent &&event); - CLEvent &operator=(CLEvent &&event); - CLEvent(const CLEvent &) = delete; - CLEvent &operator=(const CLEvent &) = delete; - - ~CLEvent(); - - uint64_t GetStartedTimeNs() const; - uint64_t GetFinishedTimeNs() const; - - double GetEventTimeMs() const; - uint64_t GetEventTimeNs() const; - - void Wait() const; - - cl_event event() const { return event_; } - - bool is_valid() const { return event_ != nullptr; } - - void SetName(const std::string &name); - std::string GetName() const { return name_; } - -private: - void Release(); - - cl_event event_ = nullptr; - - std::string name_; // optional, for profiling mostly -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc deleted file mode 100644 index 247a63d39..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClImageFormat.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -cl_channel_order ToChannelOrder(int num_channels) -{ - switch (num_channels) - { - case 1: - return CL_R; - case 2: - return CL_RG; - case 3: - return CL_RGB; - case 4: - return CL_RGBA; - default: - return -1; - } -} - -cl_channel_type ToImageChannelType(DataType data_type) -{ - switch (data_type) - { - case DataType::FLOAT32: - return CL_FLOAT; - case DataType::FLOAT16: - return CL_HALF_FLOAT; - default: - return -1; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h deleted file mode 100644 index a763746bd..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__ - -#include "OpenclWrapper.h" -#include "DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -cl_channel_order ToChannelOrder(int num_channels); - -cl_channel_type ToImageChannelType(DataType data_type); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc deleted file mode 100644 index f7745b9ac..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClKernel.h" - -#include "absl/strings/str_cat.h" -#include "ClProgram.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -absl::Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id, int *result) -{ - size_t max_work_group_size; - cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(size_t), &max_work_group_size, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_WORK_GROUP_SIZE ", - CLErrorCodeToString(error_code))); - } - *result = static_cast<int>(max_work_group_size); - return absl::OkStatus(); -} - -absl::Status GetKernelPrivateMemorySize(cl_kernel kernel, cl_device_id device_id, int *result) -{ - cl_ulong private_mem_size; - cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PRIVATE_MEM_SIZE, - sizeof(cl_ulong), &private_mem_size, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_PRIVATE_MEM_SIZE ", - CLErrorCodeToString(error_code))); - } - *result = static_cast<int>(private_mem_size); - return absl::OkStatus(); -} - -} // namespace - -CLKernel::CLKernel(CLKernel &&kernel) - : info_(kernel.info_), binding_counter_(kernel.binding_counter_), - function_name_(std::move(kernel.function_name_)), program_(kernel.program_), - kernel_(kernel.kernel_) -{ - kernel.kernel_ = nullptr; -} - -CLKernel &CLKernel::operator=(CLKernel &&kernel) -{ - if (this != &kernel) - { - Release(); - std::swap(info_, kernel.info_); - std::swap(binding_counter_, kernel.binding_counter_); - function_name_ = std::move(kernel.function_name_); - std::swap(program_, kernel.program_); - std::swap(kernel_, kernel.kernel_); - } - return *this; -} - -CLKernel::~CLKernel() { Release(); } - -absl::Status CLKernel::ReInit() const -{ - clReleaseKernel(kernel_); - cl_kernel *kern_ptr = const_cast<cl_kernel *>(&kernel_); - int error_code; - *kern_ptr = clCreateKernel(program_, function_name_.c_str(), &error_code); - if (!kernel_ || error_code != CL_SUCCESS) - { - *kern_ptr = nullptr; - return absl::UnknownError( - absl::StrCat("Failed to create ", function_name_, CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -void CLKernel::Release() -{ - if (kernel_) - { - clReleaseKernel(kernel_); - clReleaseProgram(program_); - kernel_ = nullptr; - } -} - -absl::Status CLKernel::CreateFromProgram(const CLProgram &program, const std::string &function_name) -{ - int error_code; - function_name_ = function_name; - kernel_ = clCreateKernel(program.program(), function_name.c_str(), &error_code); - if (!kernel_ || error_code != CL_SUCCESS) - { - kernel_ = nullptr; - return absl::UnknownError( - absl::StrCat("Failed to create ", function_name, CLErrorCodeToString(error_code))); - } - - program_ = program.program(); - clRetainProgram(program_); - - RETURN_IF_ERROR( - GetKernelPrivateMemorySize(kernel_, program.GetDeviceId(), &info_.private_memory_size)); - RETURN_IF_ERROR( - GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(), &info_.max_work_group_size)); - return absl::OkStatus(); -} - -absl::Status CLKernel::SetMemory(int index, cl_mem memory) -{ - return SetBytes(index, &memory, sizeof(cl_mem)); -} - -absl::Status CLKernel::SetMemoryAuto(cl_mem memory) -{ - return SetBytesAuto(&memory, sizeof(cl_mem)); -} - -absl::Status CLKernel::SetBytes(int index, const void *ptr, int length) const -{ - const int error_code = clSetKernelArg(kernel_, index, length, ptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to set kernel arguments - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLKernel::SetBytesAuto(const void *ptr, int length) -{ - const int error_code = clSetKernelArg(kernel_, binding_counter_, length, ptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ", - CLErrorCodeToString(error_code), "(at index - ", - binding_counter_, ")")); - } - binding_counter_++; - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h deleted file mode 100644 index 9575b7946..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__ - -#include <string> - -#include "ClContext.h" -#include "ClDevice.h" -#include "ClProgram.h" -#include "OpenclWrapper.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct KernelInfo -{ - int private_memory_size = 0; - int max_work_group_size = 0; -}; - -// Arguments binding to CLKernel can be manual or automatic -// In manual you specify binding index explicitly -// In automatic binding, index auto-incremented with every binding call -// Also, if you use automatic mode you must call ResetBindingCounter -// before parameters binding -class CLKernel -{ -public: - CLKernel() {} - - // Move only - CLKernel(CLKernel &&kernel); - CLKernel &operator=(CLKernel &&kernel); - CLKernel(const CLKernel &) = delete; - CLKernel &operator=(const CLKernel &) = delete; - - ~CLKernel(); - - cl_kernel kernel() const { return kernel_; } - - absl::Status CreateFromProgram(const CLProgram &program, const std::string &function_name); - - absl::Status SetMemory(int index, cl_mem memory); - absl::Status SetMemoryAuto(cl_mem memory); - template <typename T> absl::Status SetBytes(int index, const T &value) const - { - return SetBytes(index, static_cast<const void *>(&value), sizeof(T)); - } - template <typename T> absl::Status SetBytesAuto(const T &value) - { - return SetBytesAuto(static_cast<const void *>(&value), sizeof(T)); - } - - int GetBindingCounter() const { return binding_counter_; } - void ResetBindingCounter() { binding_counter_ = 0; } - - // Do not use this function - // workaround for Mali memory leak - absl::Status ReInit() const; - - KernelInfo info_; - -private: - void Release(); - absl::Status SetBytes(int index, const void *ptr, int length) const; - absl::Status SetBytesAuto(const void *ptr, int length); - - int binding_counter_ = -1; - - std::string function_name_ = ""; - // reference to program from which kernel was created - cl_program program_ = nullptr; - cl_kernel kernel_ = nullptr; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc deleted file mode 100644 index fd3bc5579..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClMemory.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -cl_mem_flags ToClMemFlags(AccessType access_type) -{ - switch (access_type) - { - case AccessType::READ: - return CL_MEM_READ_ONLY; - case AccessType::WRITE: - return CL_MEM_WRITE_ONLY; - case AccessType::READ_WRITE: - return CL_MEM_READ_WRITE; - default: - throw std::runtime_error("Invalid AccessType"); - } - - return CL_MEM_READ_ONLY; // unreachable -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h deleted file mode 100644 index c704ec71f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__ - -#include <algorithm> - -#include "OpenclWrapper.h" -#include "AccessType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// RAII wrapper for OpenCL memory object. -// -// Image is moveable but not copyable. -class CLMemory -{ -public: - // Creates invalid object. - CLMemory() : CLMemory(nullptr, false) {} - - CLMemory(cl_mem memory, bool has_ownership) : memory_(memory), has_ownership_(has_ownership) {} - - // Move-only - CLMemory(const CLMemory &) = delete; - CLMemory &operator=(const CLMemory &) = delete; - CLMemory(CLMemory &&image) : memory_(image.memory_), has_ownership_(image.has_ownership_) - { - image.memory_ = nullptr; - } - - ~CLMemory() { Invalidate(); } - - CLMemory &operator=(CLMemory &&image) - { - if (this != &image) - { - Invalidate(); - std::swap(memory_, image.memory_); - has_ownership_ = image.has_ownership_; - } - return *this; - } - - cl_mem memory() const { return memory_; } - - bool is_valid() const { return memory_ != nullptr; } - - // @return true if this object actually owns corresponding CL memory - // and manages it's lifetime. - bool has_ownership() const { return has_ownership_; } - - cl_mem Release() - { - cl_mem to_return = memory_; - memory_ = nullptr; - return to_return; - } - -private: - void Invalidate() - { - if (memory_ && has_ownership_) - { - clReleaseMemObject(memory_); - } - memory_ = nullptr; - } - - cl_mem memory_ = nullptr; - bool has_ownership_ = false; -}; - -cl_mem_flags ToClMemFlags(AccessType access_type); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc deleted file mode 100644 index c72b01a73..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClProgram.h" - -#include <cstdint> -#include <cstring> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "absl/types/span.h" -#include "Util.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::string GetProgramBuildInfo(cl_program program, cl_device_id id, cl_program_build_info info) -{ - size_t size; - cl_int error_code = clGetProgramBuildInfo(program, id, info, 0, nullptr, &size); - if (error_code != CL_SUCCESS) - { - return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code)); - } - - std::string result(size - 1, 0); - error_code = clGetProgramBuildInfo(program, id, info, size, &result[0], nullptr); - if (error_code != CL_SUCCESS) - { - return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code)); - } - return result; -} - -absl::Status GetBinarySize(cl_program program, size_t *binary_size) -{ - cl_int error_code = - clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), binary_size, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to get program binary size - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status BuildProgram(cl_program program, const CLDevice &device, - const std::string &compiler_options) -{ - const int error_code = - clBuildProgram(program, 0, nullptr, compiler_options.c_str(), nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to build program executable - ", CLErrorCodeToString(error_code), - GetProgramBuildInfo(program, device.id(), CL_PROGRAM_BUILD_LOG))); - } - - return absl::OkStatus(); -} - -std::string CompilerOptionToString(const CLDevice &device, CompilerOptions option) -{ - switch (option) - { - case CompilerOptions::ADRENO_FULL_SIMD_LINE: - if (device.info_.adreno_info.gpu_version < 500) - { - return "-qcom-accelerate-16-bit"; - } - else - { - return "-qcom-accelerate-16-bit=true"; - } - case CompilerOptions::ADRENO_MORE_WAVES: - if (device.info_.adreno_info.gpu_version >= 500) - { - return "-qcom-accelerate-16-bit=false"; - } - else - { - return ""; - } - case CompilerOptions::POWERVR_FP16: - return "-cl-fast-relaxed-math"; - case CompilerOptions::CL_OPT_DISABLE: - return "-cl-opt-disable"; - case CompilerOptions::CL_2_0: - return "-cl-std=CL2.0"; - case CompilerOptions::CL_3_0: - return "-cl-std=CL3.0"; - } - return ""; -} - -} // namespace - -std::string CompilerOptionsToString(const CLDevice &device, - const std::vector<CompilerOptions> &compiler_options) -{ - std::string result; - for (auto option : compiler_options) - { - absl::StrAppend(&result, CompilerOptionToString(device, option), " "); - } - return result; -} - -CLProgram::CLProgram(cl_program program, cl_device_id device_id) - : program_(program), device_id_(device_id) -{ -} - -CLProgram::CLProgram(CLProgram &&program) - : program_(program.program_), device_id_(program.device_id_) -{ - program.program_ = nullptr; -} - -CLProgram &CLProgram::operator=(CLProgram &&program) -{ - if (this != &program) - { - Release(); - std::swap(program_, program.program_); - std::swap(device_id_, program.device_id_); - } - return *this; -} - -CLProgram::~CLProgram() { Release(); } - -void CLProgram::Release() -{ - if (program_) - { - clReleaseProgram(program_); - program_ = nullptr; - } -} - -absl::Status CLProgram::GetBinary(std::vector<uint8_t> *result) const -{ - size_t binary_size; - RETURN_IF_ERROR(GetBinarySize(program_, &binary_size)); - result->resize(result->size() + binary_size); - uint8_t *binary_ptr = result->data() + result->size() - binary_size; - cl_int error_code = - clGetProgramInfo(program_, CL_PROGRAM_BINARIES, binary_size, &binary_ptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to get program binary - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options, - const CLContext &context, const CLDevice &device, CLProgram *result) -{ - int error_code; - const char *source = code.c_str(); - - cl_program program = - clCreateProgramWithSource(context.context(), 1, &source, nullptr, &error_code); - if (!program || error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to create compute program - ", CLErrorCodeToString(error_code))); - } - - *result = CLProgram(program, device.id()); - RETURN_IF_ERROR(BuildProgram(program, device, compiler_options)); - return absl::OkStatus(); -} - -absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device, - absl::Span<const uint8_t> binary, CLProgram *result) -{ - cl_int binary_status; - cl_int error_code; - cl_device_id devices_list[] = {device.id()}; - size_t binary_size = binary.size(); - const uint8_t *binary_pointer = binary.data(); - cl_program program = clCreateProgramWithBinary(context.context(), 1, devices_list, &binary_size, - &binary_pointer, &binary_status, &error_code); - if (binary_status != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat( - "Something wrong with binary after clCreateProgramWithBinary - ", binary_status)); - } - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to create program - ", CLErrorCodeToString(error_code))); - } - *result = CLProgram(program, device.id()); - return BuildProgram(program, device, ""); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h deleted file mode 100644 index d039ff698..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__ - -#include <cstdint> -#include <vector> - -#include "ClContext.h" -#include "ClDevice.h" -#include "OpenclWrapper.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class CompilerOptions -{ - // ADRENO_FULL_SIMD_LINE: - // Adreno can have 2 sizes for SIMD size. - // On Adreno 4xx/5xx it is 32/64, on Adreno6xx it is 64/128. - // Some our algorithms actually rely on exact size, for example on full - // SIMD size, so we need this define. - // This define is actually -qcom-accelerate-16-bit, but it controls SIMD - // size. - ADRENO_FULL_SIMD_LINE, - ADRENO_MORE_WAVES, - POWERVR_FP16, - CL_OPT_DISABLE, - CL_2_0, - CL_3_0, -}; - -std::string CompilerOptionsToString(const CLDevice &device, - const std::vector<CompilerOptions> &compiler_options); - -class CLProgram -{ -public: - CLProgram() {} - CLProgram(cl_program program, cl_device_id device_id); - - // Move only - CLProgram(CLProgram &&program); - CLProgram &operator=(CLProgram &&program); - CLProgram(const CLProgram &) = delete; - CLProgram &operator=(const CLProgram &) = delete; - - ~CLProgram(); - - cl_program program() const { return program_; } - - // Return the cl_device_id associated with the program object. - // This can be the device associated with context on which the program object - // has been created or can be device that was specified when a program object - // was created using clCreateProgramWithBinary. - cl_device_id GetDeviceId() const { return device_id_; } - - absl::Status GetBinary(std::vector<uint8_t> *result) const; - -private: - void Release(); - - cl_program program_ = nullptr; - - // reference - cl_device_id device_id_ = nullptr; -}; - -absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options, - const CLContext &context, const CLDevice &device, CLProgram *result); - -absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device, - absl::Span<const uint8_t> binary, CLProgram *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc b/runtime/onert/backend/gpu_cl/open_cl/DataType.cc deleted file mode 100644 index ce2aa8298..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DataType.h" - -#include <stddef.h> -#include <string> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -size_t SizeOf(DataType data_type) -{ - switch (data_type) - { - case DataType::UINT8: - case DataType::INT8: - return 1; - case DataType::FLOAT16: - case DataType::INT16: - case DataType::UINT16: - return 2; - case DataType::FLOAT32: - case DataType::INT32: - case DataType::UINT32: - return 4; - case DataType::FLOAT64: - case DataType::INT64: - case DataType::UINT64: - return 8; - case DataType::UNKNOWN: - return 0; - } - return 0; -} - -std::string ToString(DataType data_type) -{ - switch (data_type) - { - case DataType::FLOAT16: - return "float16"; - case DataType::FLOAT32: - return "float32"; - case DataType::FLOAT64: - return "float64"; - case DataType::INT16: - return "int16"; - case DataType::INT32: - return "int32"; - case DataType::INT64: - return "int64"; - case DataType::INT8: - return "int8"; - case DataType::UINT16: - return "uint16"; - case DataType::UINT32: - return "uint32"; - case DataType::UINT64: - return "uint64"; - case DataType::UINT8: - return "uint8"; - case DataType::UNKNOWN: - return "unknown"; - } - return "undefined"; -} - -std::string ToCLDataType(DataType data_type, int vec_size) -{ - const std::string postfix = vec_size == 1 ? "" : std::to_string(vec_size); - switch (data_type) - { - case DataType::FLOAT16: - return "half" + postfix; - case DataType::FLOAT32: - return "float" + postfix; - case DataType::FLOAT64: - return "double" + postfix; - case DataType::INT16: - return "short" + postfix; - case DataType::INT32: - return "int" + postfix; - case DataType::INT64: - return "long" + postfix; - case DataType::INT8: - return "char" + postfix; - case DataType::UINT16: - return "ushort" + postfix; - case DataType::UINT32: - return "uint" + postfix; - case DataType::UINT64: - return "ulong" + postfix; - case DataType::UINT8: - return "uchar" + postfix; - case DataType::UNKNOWN: - return "unknown"; - } - return "undefined"; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.h b/runtime/onert/backend/gpu_cl/open_cl/DataType.h deleted file mode 100644 index 2a5afd551..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/DataType.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__ - -#include <stddef.h> -#include <string> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class DataType -{ - UNKNOWN = 0, - FLOAT16 = 1, - FLOAT32 = 2, - FLOAT64 = 3, - UINT8 = 4, - INT8 = 5, - UINT16 = 6, - INT16 = 7, - UINT32 = 8, - INT32 = 9, - UINT64 = 10, - INT64 = 11, -}; - -size_t SizeOf(DataType type); - -std::string ToString(DataType t); - -std::string ToCLDataType(DataType data_type, int vec_size = 1); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc deleted file mode 100644 index 2966fad75..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DeviceInfo.h" - -#include <algorithm> -#include <map> -#include <string> -#include <vector> - -#include "absl/strings/numbers.h" -#include "absl/strings/str_split.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -namespace -{ -// check that gpu_version belong to range min_version-max_version -// min_version is included and max_version is excluded. -bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) -{ - return gpu_version >= min_version && gpu_version < max_version; -} - -MaliGPU GetMaliGPUVersion(const std::string &device_name) -{ - const std::map<std::string, MaliGPU> kMapping = { - {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624}, - {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678}, - {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820}, - {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880}, - {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71}, - {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76}, - {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77}, {"G68", MaliGPU::G68}, - {"G78", MaliGPU::G78}, - }; - for (const auto &v : kMapping) - { - if (device_name.find(v.first) != std::string::npos) - { - return v.second; - } - } - return MaliGPU::UNKNOWN; -} - -} // namespace - -// There is no rule for gpu version encoding, but we found these samples: -// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2 -// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2 -// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3 -// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8 -// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9 -// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge -// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4) -// After the number string ends. -// It is assumed that the <vendor-specific information> for Adreno GPUs has -// the following format: -// <text?><space?>Adreno(TM)<space><text?><version> -// Returns -1 if vendor-specific information cannot be parsed -int GetAdrenoGPUVersion(const std::string &gpu_version) -{ - const std::string gpu = absl::AsciiStrToLower(gpu_version); - const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' '); - size_t i = 0; - for (; i < words.size(); ++i) - { - if (words[i].find("adreno") != words[i].npos) - { - break; - } - } - i += 1; - for (; i < words.size(); ++i) - { - int number; - bool is_number = absl::SimpleAtoi(words[i], &number); - // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx - if (is_number && number >= 300) - { - return number; - } - } - return -1; -} - -std::string VendorToString(Vendor v) -{ - switch (v) - { - case Vendor::kQualcomm: - return "Qualcomm"; - case Vendor::kMali: - return "Mali"; - case Vendor::kPowerVR: - return "PowerVR"; - case Vendor::kNvidia: - return "NVIDIA"; - case Vendor::kAMD: - return "AMD"; - case Vendor::kIntel: - return "Intel"; - case Vendor::kUnknown: - return "unknown vendor"; - default: - return "Error"; - } -} - -std::string OpenCLVersionToString(OpenCLVersion version) -{ - switch (version) - { - case OpenCLVersion::CL_1_0: - return "1.0"; - case OpenCLVersion::CL_1_1: - return "1.1"; - case OpenCLVersion::CL_1_2: - return "1.2"; - case OpenCLVersion::CL_2_0: - return "2.0"; - case OpenCLVersion::CL_2_1: - return "2.1"; - case OpenCLVersion::CL_2_2: - return "2.2"; - case OpenCLVersion::CL_3_0: - return "3.0"; - default: - return "Error"; - } -} - -AdrenoInfo::AdrenoInfo(const std::string &device_version) - : gpu_version(GetAdrenoGPUVersion(device_version)) -{ -} - -int AdrenoInfo::GetMaximumWavesCount() const -{ - if (gpu_version < 400) - { - return -1; // Adreno 3xx does not support it currently - } - else if (gpu_version >= 400 && gpu_version < 500) - { - return -1; // Adreno 4xx does not support it currently - } - else if (gpu_version >= 500 && gpu_version < 600) - { - return -1; // Adreno 5xx does not support it currently - } - else if (gpu_version >= 600 && gpu_version < 700) - { - return gpu_version == 640 ? 30 : 16; - } - else - { - return -1; // Adreno 7xx and higher does not exist yet - } -} - -int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const -{ - if (gpu_version < 400) - { - return -1; // Adreno 3xx does not support it currently - } - else if (gpu_version >= 400 && gpu_version < 500) - { - return -1; // Adreno 4xx does not support it currently - } - else if (gpu_version >= 500 && gpu_version < 600) - { - return -1; // Adreno 5xx does not support it currently - } - else if (gpu_version >= 600 && gpu_version < 700) - { - return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16; - } - else - { - return -1; // Adreno 7xx and higher does not exist yet - } -} - -int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave) const -{ - const int register_usage_per_wave = GetWaveSize(full_wave) * register_footprint_per_tread; - const int possible_waves_count = GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave; - return std::min(possible_waves_count, GetMaximumWavesCount()); -} - -int AdrenoInfo::GetWaveSize(bool full_wave) const -{ - if (gpu_version < 400) - { - return -1; // Adreno 3xx does not support it currently - } - else if (gpu_version < 600) - { - return full_wave ? 64 : 32; - } - else - { - return full_wave ? 128 : 64; - } -} - -MaliInfo::MaliInfo(const std::string &device_name) : gpu_version(GetMaliGPUVersion(device_name)) {} - -bool MaliInfo::IsMaliT6xx() const -{ - return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 || - gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 || - gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678; -} - -bool MaliInfo::IsMaliT7xx() const -{ - return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760; -} - -bool MaliInfo::IsMaliT8xx() const -{ - return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 || - gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880; -} - -bool MaliInfo::IsMidgard() const { return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); } - -bool MaliInfo::IsBifrostGen1() const -{ - return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || gpu_version == MaliGPU::G71; -} - -bool MaliInfo::IsBifrostGen2() const -{ - return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72; -} - -bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; } - -bool MaliInfo::IsBifrost() const { return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); } - -bool MaliInfo::IsValhall() const -{ - return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77 || - gpu_version == MaliGPU::G68 || gpu_version == MaliGPU::G78; -} - -bool DeviceInfo::SupportsTextureArray() const { return cl_version >= OpenCLVersion::CL_1_2; } - -bool DeviceInfo::SupportsImageBuffer() const { return cl_version >= OpenCLVersion::CL_1_2; } - -bool DeviceInfo::SupportsImage3D() const -{ - if (vendor == Vendor::kMali) - { - // On Mali T880 read_imageh doesn't compile with image3d_t - return false; - } - return supports_image3d_writes; -} - -bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const -{ - if (channels == 1) - { - return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d : supports_r_f16_tex2d; - } - else if (channels == 2) - { - return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d : supports_rg_f16_tex2d; - } - else if (channels == 3) - { - return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d : supports_rgb_f16_tex2d; - } - else if (channels == 4) - { - return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d : supports_rgba_f16_tex2d; - } - else - { - return false; - } -} - -bool DeviceInfo::SupportsOneLayerTextureArray() const -{ - return !IsAdreno() || adreno_info.support_one_layer_texture_array; -} - -bool DeviceInfo::SupportsExtension(const std::string &extension) const -{ - for (const auto &ext : extensions) - { - if (ext == extension) - { - return true; - } - } - return false; -} - -bool DeviceInfo::IsCL20OrHigher() const -{ - return cl_version != OpenCLVersion::CL_1_0 && cl_version != OpenCLVersion::CL_1_1 && - cl_version != OpenCLVersion::CL_1_2; -} - -bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const -{ - for (auto subgroup_size : supported_subgroup_sizes) - { - if (sub_group_size == subgroup_size) - { - return true; - } - } - return false; -} - -bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; } - -bool DeviceInfo::IsAdreno3xx() const -{ - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400); -} - -bool DeviceInfo::IsAdreno4xx() const -{ - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500); -} - -bool DeviceInfo::IsAdreno5xx() const -{ - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600); -} - -bool DeviceInfo::IsAdreno6xx() const -{ - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700); -} - -bool DeviceInfo::IsAdreno6xxOrHigher() const -{ - return IsAdreno() && adreno_info.gpu_version >= 600; -} - -bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; } - -bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; } - -bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; } - -bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; } - -bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; } - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h deleted file mode 100644 index 85d7d4c80..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__ - -#include <string> -#include <vector> - -#include "DataType.h" - -// for use only in device_info.cc, but keep here to make tests -int GetAdrenoGPUVersion(const std::string &gpu_version); - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class Vendor -{ - kQualcomm, - kMali, - kPowerVR, - kNvidia, - kAMD, - kIntel, - kUnknown -}; -std::string VendorToString(Vendor v); - -enum class OpenCLVersion -{ - UNKNOWN, - CL_1_0, - CL_1_1, - CL_1_2, - CL_2_0, - CL_2_1, - CL_2_2, - CL_3_0 -}; -std::string OpenCLVersionToString(OpenCLVersion version); - -struct AdrenoInfo -{ - AdrenoInfo() = default; - explicit AdrenoInfo(const std::string &device_version); - int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc. - - // This function returns some not very documented physical parameter of - // Adreno6xx GPU. - // We obtained it using Snapdragon Profiler. - int GetMaximumWavesCount() const; - - // returns amount of register memory per CU(Compute Unit) in bytes. - int GetRegisterMemorySizePerComputeUnit() const; - - // returns maximum possible amount of waves based on register usage. - int GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave = true) const; - - int GetWaveSize(bool full_wave) const; - - // Not supported on some Adreno devices with specific driver version. - // b/131099086 - bool support_one_layer_texture_array = true; -}; - -enum class MaliGPU -{ - T604, - T622, - T624, - T628, - T658, - T678, - T720, - T760, - T820, - T830, - T860, - T880, - G31, - G51, - G71, - G52, - G72, - G76, - G57, - G77, - G68, - G78, - UNKNOWN -}; - -struct MaliInfo -{ - MaliInfo() = default; - explicit MaliInfo(const std::string &device_name); - MaliGPU gpu_version = MaliGPU::UNKNOWN; - - bool IsMaliT6xx() const; - bool IsMaliT7xx() const; - bool IsMaliT8xx() const; - bool IsMidgard() const; - bool IsBifrostGen1() const; - bool IsBifrostGen2() const; - bool IsBifrostGen3() const; - bool IsBifrost() const; - bool IsValhall() const; -}; - -struct DeviceInfo -{ - DeviceInfo() = default; - - bool IsAdreno() const; - bool IsAdreno3xx() const; - bool IsAdreno4xx() const; - bool IsAdreno5xx() const; - bool IsAdreno6xx() const; - bool IsAdreno6xxOrHigher() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsMali() const; - bool IsAMD() const; - bool IsIntel() const; - - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - - bool SupportsFloatImage2D(DataType data_type, int channels) const; - - // To track bug on some Adreno. b/131099086 - bool SupportsOneLayerTextureArray() const; - - bool SupportsExtension(const std::string &extension) const; - bool IsCL20OrHigher() const; - bool SupportsSubGroupWithSize(int sub_group_size) const; - - std::vector<std::string> extensions; - bool supports_fp16 = false; - bool supports_image3d_writes = false; - Vendor vendor = Vendor::kUnknown; - OpenCLVersion cl_version = OpenCLVersion::UNKNOWN; - int compute_units_count = 0; - uint64_t buffer_max_size = 0; - uint64_t image2d_max_width = 0; - uint64_t image2d_max_height = 0; - uint64_t image_buffer_max_size = 0; - uint64_t image_array_max_layers = 0; - uint64_t image3d_max_width = 0; - uint64_t image3d_max_height = 0; - uint64_t image3d_max_depth = 0; - int max_work_group_size_x = 0; - int max_work_group_size_y = 0; - int max_work_group_size_z = 0; - std::vector<int> supported_subgroup_sizes; - - // rtn is ROUND_TO_NEAREST - // with rtn precision is much better then with rtz (ROUND_TO_ZERO) - // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn - // Mali from T6xx supports rtn - // PowerVR supports only rtz - bool supports_fp32_rtn = false; - bool supports_fp16_rtn = false; - - bool supports_r_f16_tex2d = false; - bool supports_rg_f16_tex2d = false; - bool supports_rgb_f16_tex2d = false; - bool supports_rgba_f16_tex2d = false; - - bool supports_r_f32_tex2d = false; - bool supports_rg_f32_tex2d = false; - bool supports_rgb_f32_tex2d = false; - bool supports_rgba_f32_tex2d = false; - - AdrenoInfo adreno_info; - MaliInfo mali_info; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc b/runtime/onert/backend/gpu_cl/open_cl/Environment.cc deleted file mode 100644 index b558f0377..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Environment.h" - -#include <string> -#include <vector> - -#include "Util.h" -#include "Shape.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -Environment::Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue, - ProfilingCommandQueue &&profiling_queue) - : device_(std::move(device)), context_(std::move(context)), queue_(std::move(queue)), - profiling_queue_(std::move(profiling_queue)) -{ -} - -Environment::Environment(Environment &&environment) - : device_(std::move(environment.device_)), context_(std::move(environment.context_)), - queue_(std::move(environment.queue_)), - profiling_queue_(std::move(environment.profiling_queue_)), - program_cache_(std::move(environment.program_cache_)) -{ -} - -Environment &Environment::operator=(Environment &&environment) -{ - if (this != &environment) - { - device_ = std::move(environment.device_); - context_ = std::move(environment.context_); - queue_ = std::move(environment.queue_); - profiling_queue_ = std::move(environment.profiling_queue_); - program_cache_ = std::move(environment.program_cache_); - } - return *this; -} - -absl::Status Environment::Init() -{ - if (device().IsAdreno() && device().SupportsTextureArray()) - { - // Some Adreno < 600 have bug with one layer texture array. b/131099086 - // If we have one layer texture array and will write smt from kernel to this - // texture, we will get zeroes instead of actual values. - // The same kernel will work, if we use texture array with more than one - // layer. - if (device().info_.adreno_info.gpu_version < 600) - { - GetDevicePtr()->DisableOneLayerTextureArray(); - } - } - return absl::OkStatus(); -} - -void Environment::SetHighPerformance() const -{ - // TODO(sorokin) use cl_perf_hint if available -} - -void Environment::SetDefaultPerformance() const -{ - // TODO(sorokin) use cl_perf_hint if available -} - -void Environment::SetLowPerformance() const -{ - // TODO(sorokin) use cl_perf_hint if available -} - -std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const -{ - std::vector<CalculationsPrecision> precisions; - for (CalculationsPrecision precision : - {CalculationsPrecision::F32, CalculationsPrecision::F32_F16, CalculationsPrecision::F16}) - { - if (IsSupported(precision)) - { - precisions.push_back(precision); - } - } - return precisions; -} - -bool Environment::IsSupported(CalculationsPrecision precision) const -{ - switch (precision) - { - case CalculationsPrecision::F32_F16: - case CalculationsPrecision::F16: - return device_.SupportsFP16(); - case CalculationsPrecision::F32: - return true; - } - return false; -} - -std::vector<TensorStorageType> Environment::GetSupportedStorages() const -{ - std::vector<TensorStorageType> storage_types; - for (auto storage_type : - {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER, TensorStorageType::TEXTURE_ARRAY, - TensorStorageType::IMAGE_BUFFER, TensorStorageType::TEXTURE_3D}) - { - if (IsSupported(storage_type)) - { - storage_types.push_back(storage_type); - } - } - return storage_types; -} - -std::vector<TensorStorageType> Environment::GetSupportedStoragesWithHWZeroClampSupport() const -{ - std::vector<TensorStorageType> storage_types; - for (auto storage_type : {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY, - TensorStorageType::TEXTURE_3D}) - { - if (IsSupported(storage_type)) - { - storage_types.push_back(storage_type); - } - } - return storage_types; -} - -bool Environment::IsSupported(TensorStorageType storage_type) const -{ - switch (storage_type) - { - case TensorStorageType::TEXTURE_2D: - return !device_.IsAMD(); - case TensorStorageType::BUFFER: - return true; - case TensorStorageType::TEXTURE_ARRAY: - return !device_.IsAMD() && device_.SupportsTextureArray(); - case TensorStorageType::IMAGE_BUFFER: - return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) && - device_.SupportsImageBuffer(); - case TensorStorageType::TEXTURE_3D: - return !device_.IsAMD() && device_.SupportsImage3D(); - case TensorStorageType::SINGLE_TEXTURE_2D: - return false; - case TensorStorageType::UNKNOWN: - return false; - } - return false; -} - -TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info) -{ - if (gpu_info.IsAdreno()) - { - if (gpu_info.IsAdreno6xxOrHigher()) - { - return TensorStorageType::TEXTURE_ARRAY; - } - else - { - return TensorStorageType::TEXTURE_2D; - } - } - else if (gpu_info.IsPowerVR()) - { - return TensorStorageType::TEXTURE_2D; - } - else if (gpu_info.IsMali()) - { - const MaliInfo mali_info = gpu_info.mali_info; - if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall()) - { - return TensorStorageType::TEXTURE_2D; - } - else - { - return TensorStorageType::BUFFER; - } - } - else if (gpu_info.IsNvidia()) - { - return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER - : TensorStorageType::BUFFER; - } - else if (gpu_info.IsAMD()) - { - return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER - : TensorStorageType::BUFFER; - } - else if (gpu_info.IsIntel()) - { - return TensorStorageType::BUFFER; - } - return TensorStorageType::BUFFER; -} - -TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info) -{ - if (gpu_info.IsAdreno()) - { - if (gpu_info.IsAdreno3xx() || gpu_info.IsAdreno4xx()) - { - return TensorStorageType::BUFFER; - } - else - { - return TensorStorageType::IMAGE_BUFFER; - } - } - else if (gpu_info.IsPowerVR()) - { - return TensorStorageType::BUFFER; - } - else if (gpu_info.IsMali()) - { - return TensorStorageType::BUFFER; - } - else if (gpu_info.IsNvidia()) - { - return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER - : TensorStorageType::BUFFER; - } - else if (gpu_info.IsAMD()) - { - return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER - : TensorStorageType::BUFFER; - } - else if (gpu_info.IsIntel()) - { - return TensorStorageType::BUFFER; - } - return TensorStorageType::BUFFER; -} - -absl::Status CreateEnvironment(Environment *result) -{ - CLDevice gpu; - RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu)); - - CLContext context; - RETURN_IF_ERROR(CreateCLContext(gpu, &context)); - CLCommandQueue queue; - RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue)); - ProfilingCommandQueue profiling_queue; - RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue)); - - *result = - Environment(std::move(gpu), std::move(context), std::move(queue), std::move(profiling_queue)); - return result->Init(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.h b/runtime/onert/backend/gpu_cl/open_cl/Environment.h deleted file mode 100644 index 47866b563..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Environment.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__ - -#include "ClCommandQueue.h" -#include "ClContext.h" -#include "ClDevice.h" -#include "DeviceInfo.h" -#include "Precision.h" -#include "TensorType.h" -#include "DataType.h" -#include "ProgramCache.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class Environment -{ -public: - Environment() = default; - explicit Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue, - ProfilingCommandQueue &&profiling_queue); - // Move only - Environment(Environment &&environment); - Environment &operator=(Environment &&environment); - Environment(const Environment &) = delete; - Environment &operator=(const Environment &) = delete; - - const CLDevice &device() const { return device_; } - CLDevice *GetDevicePtr() { return &device_; } - const CLDevice *GetDevicePtr() const { return &device_; } - CLContext &context() { return context_; } - CLCommandQueue *queue() { return &queue_; } - ProfilingCommandQueue *profiling_queue() { return &profiling_queue_; } - ProgramCache *program_cache() { return &program_cache_; } - const ProgramCache *program_cache() const { return &program_cache_; } - - std::vector<CalculationsPrecision> GetSupportedPrecisions() const; - bool IsSupported(CalculationsPrecision precision) const; - std::vector<TensorStorageType> GetSupportedStorages() const; - // returns storage types that support zero clamping when reading OOB in HW - // (Height/Width) dimensions. - std::vector<TensorStorageType> GetSupportedStoragesWithHWZeroClampSupport() const; - bool IsSupported(TensorStorageType storage_type) const; - - absl::Status Init(); - - void SetHighPerformance() const; - void SetDefaultPerformance() const; - void SetLowPerformance() const; // for energy saving - -private: - CLDevice device_; - CLContext context_; - CLCommandQueue queue_; - ProfilingCommandQueue profiling_queue_; - ProgramCache program_cache_; -}; - -TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info); -TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info); - -absl::Status CreateEnvironment(Environment *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h b/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h deleted file mode 100644 index a31630235..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__ - -#include <map> -#include <memory> -#include <string> -#include <vector> - -#include "ClContext.h" -#include "OpenclWrapper.h" -#include "AccessType.h" -#include "DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct GPUImage2DDescriptor -{ - DataType data_type = DataType::UNKNOWN; - AccessType access_type = AccessType::UNKNOWN; - cl_mem memory = nullptr; -}; - -struct GPUImage3DDescriptor -{ - DataType data_type = DataType::UNKNOWN; - AccessType access_type = AccessType::UNKNOWN; - cl_mem memory = nullptr; -}; - -struct GPUImage2DArrayDescriptor -{ - DataType data_type = DataType::UNKNOWN; - AccessType access_type = AccessType::UNKNOWN; - cl_mem memory = nullptr; -}; - -struct GPUImageBufferDescriptor -{ - DataType data_type = DataType::UNKNOWN; - AccessType access_type = AccessType::UNKNOWN; - cl_mem memory = nullptr; -}; - -struct GPUCustomMemoryDescriptor -{ - std::string type_name = ""; - cl_mem memory = nullptr; -}; - -enum class MemoryType -{ - GLOBAL, - CONSTANT, - LOCAL -}; - -std::string MemoryTypeToCLType(MemoryType type); - -struct GPUBufferDescriptor -{ - DataType data_type = DataType::UNKNOWN; - AccessType access_type = AccessType::UNKNOWN; - int element_size = 0; - MemoryType memory_type = MemoryType::GLOBAL; - std::vector<std::string> attributes; - cl_mem memory = nullptr; -}; - -struct GPUResources -{ - std::vector<std::string> ints; - std::vector<std::string> floats; - std::vector<std::pair<std::string, GPUBufferDescriptor>> buffers; - std::vector<std::pair<std::string, GPUImage2DDescriptor>> images2d; - std::vector<std::pair<std::string, GPUImage2DArrayDescriptor>> image2d_arrays; - std::vector<std::pair<std::string, GPUImage3DDescriptor>> images3d; - std::vector<std::pair<std::string, GPUImageBufferDescriptor>> image_buffers; - std::vector<std::pair<std::string, GPUCustomMemoryDescriptor>> custom_memories; - - std::vector<std::string> GetNames() const - { - std::vector<std::string> names = ints; - names.insert(names.end(), floats.begin(), floats.end()); - for (const auto &obj : buffers) - { - names.push_back(obj.first); - } - for (const auto &obj : images2d) - { - names.push_back(obj.first); - } - for (const auto &obj : image2d_arrays) - { - names.push_back(obj.first); - } - for (const auto &obj : images3d) - { - names.push_back(obj.first); - } - for (const auto &obj : image_buffers) - { - names.push_back(obj.first); - } - for (const auto &obj : custom_memories) - { - names.push_back(obj.first); - } - return names; - } -}; - -struct GPUResourcesWithValue -{ - std::vector<std::pair<std::string, int>> ints; - std::vector<std::pair<std::string, float>> floats; - std::vector<std::pair<std::string, cl_mem>> buffers; - std::vector<std::pair<std::string, cl_mem>> images2d; - std::vector<std::pair<std::string, cl_mem>> image2d_arrays; - std::vector<std::pair<std::string, cl_mem>> images3d; - std::vector<std::pair<std::string, cl_mem>> image_buffers; - std::vector<std::pair<std::string, cl_mem>> custom_memories; -}; - -class GPUObject; - -class GPUObjectDescriptor -{ -public: - GPUObjectDescriptor() = default; - GPUObjectDescriptor(const GPUObjectDescriptor &) = default; - GPUObjectDescriptor &operator=(const GPUObjectDescriptor &) = default; - GPUObjectDescriptor(GPUObjectDescriptor &&obj_desc) : state_vars_(std::move(obj_desc.state_vars_)) - { - } - GPUObjectDescriptor &operator=(GPUObjectDescriptor &&obj_desc) - { - if (this != &obj_desc) - { - state_vars_ = std::move(obj_desc.state_vars_); - } - return *this; - } - virtual ~GPUObjectDescriptor() = default; - - void SetStateVar(const std::string &key, const std::string &value) const - { - state_vars_[key] = value; - } - - virtual std::string PerformConstExpr(const std::string &) const { return ""; } - - virtual absl::Status PerformSelector(const std::string &, const std::vector<std::string> &, - const std::vector<std::string> &, std::string *result) const - { - *result = ""; - return absl::OkStatus(); - } - virtual GPUResources GetGPUResources() const { return GPUResources(); } - - virtual absl::Status CreateGPUObject(CLContext *, std::unique_ptr<GPUObject> *) const - { - return absl::OkStatus(); - } - virtual void Release() {} - - void SetAccess(AccessType access_type) { access_type_ = access_type; } - AccessType GetAccess() const { return access_type_; } - -protected: - // friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode( - // const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder); - // friend void Decode(const data::GPUObjectDescriptor* fb_obj, - // GPUObjectDescriptor* obj); - mutable std::map<std::string, std::string> state_vars_; - AccessType access_type_ = AccessType::UNKNOWN; -}; - -using GPUObjectDescriptorPtr = std::unique_ptr<GPUObjectDescriptor>; - -class GPUObject -{ -public: - GPUObject() = default; - // Move only - GPUObject(GPUObject &&obj_desc) = default; - GPUObject &operator=(GPUObject &&obj_desc) = default; - GPUObject(const GPUObject &) = delete; - GPUObject &operator=(const GPUObject &) = delete; - virtual ~GPUObject() = default; - virtual absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const = 0; -}; - -using GPUObjectPtr = std::unique_ptr<GPUObject>; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc deleted file mode 100644 index afb7e2950..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "InferenceContext.h" - -#include <algorithm> -#include <cmath> -#include <cstdint> -#include <map> -#include <memory> -#include <string> -#include <vector> -#include <unordered_map> - -#include "Buffer.h" -#include "ClDevice.h" - -#include "kernels/GpuOperation.h" -#include "ModelHints.h" -#include "Precision.h" -#include "StorageTypeUtil.h" -#include "TensorType.h" -#include "DataType.h" -#include "Model.h" -#include "Operations.h" -#include "Shape.h" -#include "Types.h" -#include "Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -CLNode::CLNode(CLNode &&node) - : operation(std::move(node.operation)), inputs(std::move(node.inputs)), - outputs(std::move(node.outputs)), name(std::move(node.name)) -{ -} - -CLNode &CLNode::operator=(CLNode &&node) -{ - if (this != &node) - { - operation = std::move(node.operation); - inputs = std::move(node.inputs); - outputs = std::move(node.outputs); - name = std::move(node.name); - } - return *this; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h deleted file mode 100644 index ebe2c5313..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__ - -#include <cstdint> -#include <functional> -#include <map> -#include <memory> -#include <vector> -#include <unordered_map> - -#include "Buffer.h" -#include "ClCommandQueue.h" -#include "Environment.h" -#include "GpuObject.h" -#include "kernels/GpuOperation.h" -#include "ModelHints.h" -#include "OpenclWrapper.h" -#include "Precision.h" -#include "TensorType.h" -#include "Model.h" -#include "InternalTensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct CLNode -{ - std::unique_ptr<GPUOperation> operation; - std::vector<ValueId> inputs; - std::vector<ValueId> outputs; - - // Mostly for debug purposes. - std::string name; - - CLNode() = default; - - CLNode(CLNode &&node); - CLNode &operator=(CLNode &&node); - CLNode(const CLNode &) = delete; - CLNode &operator=(const CLNode &) = delete; -}; - -class InferenceContext -{ -public: - struct CreateInferenceInfo - { - CalculationsPrecision precision; - TensorStorageType storage_type; - ModelHints hints; - }; - - struct DummyTensor - { - BHWC shape; - TensorDescriptor descriptor; - - bool operator==(const DummyTensor &b) const - { - return shape == b.shape && descriptor == b.descriptor; - } - }; - - class TensorReserver - { - public: - ValueId Add(const std::shared_ptr<DummyTensor> dummy) - { - reservations_[next_] = std::move(dummy); - return next_++; - } - void Add(ValueId id, const std::shared_ptr<DummyTensor> dummy) - { - reservations_[id] = std::move(dummy); - } - void SetNext(ValueId id) { next_ = id; } - bool HaveTensor(ValueId id) { return reservations_.find(id) != reservations_.end(); } - std::shared_ptr<DummyTensor> Get(ValueId id) { return reservations_[id]; } - - std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const - { - std::vector<std::pair<ValueId, TensorDescriptor>> result; - for (auto &v : reservations_) - { - TensorDescriptor desc = v.second->descriptor; - desc.shape.b = v.second->shape.b; - desc.shape.h = v.second->shape.h; - desc.shape.w = v.second->shape.w; - desc.shape.d = 1; - desc.shape.c = v.second->shape.c; - result.push_back({v.first, desc}); - } - return result; - } - - void Add(const std::vector<std::pair<ValueId, TensorDescriptor>> &tensors) - { - for (auto &v : tensors) - { - auto dummy = std::make_shared<DummyTensor>(); - dummy->descriptor = v.second; - dummy->shape.b = v.second.shape.b; - dummy->shape.h = v.second.shape.h; - dummy->shape.w = v.second.shape.w; - dummy->shape.c = v.second.shape.c; - Add(v.first, dummy); - } - } - - private: - std::unordered_map<ValueId, std::shared_ptr<DummyTensor>> reservations_; - ValueId next_ = 0; - }; - -private: -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h b/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h deleted file mode 100644 index f0423db86..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__ - -#include <stdint.h> - -#include <vector> - -#include "DataType.h" -#include "Shape.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace internal_tensor -{ - -// Meta function given element type returns a type for Tensor data container. -template <DataType Type> struct StorageType; - -template <> struct StorageType<DataType::FLOAT32> -{ - using value = std::vector<float>; -}; - -template <> struct StorageType<DataType::INT32> -{ - using value = std::vector<int32_t>; -}; - -} // namespace internal_tensor - -template <typename ShapeT, DataType Type> struct InternalTensor -{ - using ShapeType = ShapeT; - - constexpr static DataType kType = Type; - - using TensorStorageType = typename internal_tensor::StorageType<Type>::value; - - // Opaque id of a tensor. - int64_t id = -1; - - ShapeType shape; - - TensorStorageType data; -}; - -// TensorRef is a reference to another tensor. If an object should never hold -// tensor data, then TensorRef should be used instead. -template <typename ShapeT> struct TensorRef -{ - using ShapeType = ShapeT; - - DataType type = DataType::UNKNOWN; - - ShapeT shape; - - // Opaque reference to a tensor. Upstream component is responsible for - // resolving this reference into an actual tensor. - int64_t ref = -1; - - // Specifies if the tensor should be a variable input tensor that must be an - // output as well as an input to the graph. - bool is_variable_input = false; -}; - -template <typename ShapeT, DataType Type> constexpr DataType InternalTensor<ShapeT, Type>::kType; - -template <typename ShapeT, DataType Type> -InternalTensor<ShapeT, Type> MakeZeroTensor(const ShapeT &shape) -{ - InternalTensor<ShapeT, Type> tensor; - tensor.shape = shape; - tensor.data = - typename InternalTensor<ShapeT, Type>::TensorStorageType(shape.DimensionsProduct(), 0); - return tensor; -} - -using TensorFloat32 = InternalTensor<BHWC, DataType::FLOAT32>; -using Tensor5DFloat32 = InternalTensor<BHWDC, DataType::FLOAT32>; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc deleted file mode 100644 index 3889d4369..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LinearStorage.h" - -#include "absl/strings/str_cat.h" -#include "DataType.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor &&desc) - : GPUObjectDescriptor(std::move(desc)), storage_type(desc.storage_type), - element_type(desc.element_type), memory_type(desc.memory_type), size(desc.size), - data(std::move(desc.data)) -{ -} - -TensorLinearDescriptor &TensorLinearDescriptor::operator=(TensorLinearDescriptor &&desc) -{ - if (this != &desc) - { - std::swap(storage_type, desc.storage_type); - std::swap(element_type, desc.element_type); - std::swap(memory_type, desc.memory_type); - std::swap(size, desc.size); - data = std::move(desc.data); - GPUObjectDescriptor::operator=(std::move(desc)); - } - return *this; -} - -void TensorLinearDescriptor::Release() { data.clear(); } - -GPUResources TensorLinearDescriptor::GetGPUResources() const -{ - GPUResources resources; - resources.ints.push_back("length"); - if (storage_type == LinearStorageType::BUFFER) - { - GPUBufferDescriptor desc; - desc.data_type = element_type; - desc.access_type = access_type_; - desc.element_size = 4; - desc.memory_type = memory_type; - resources.buffers.push_back({"buffer", desc}); - } - else - { - GPUImage2DDescriptor desc; - desc.data_type = element_type; - desc.access_type = access_type_; - resources.images2d.push_back({"tex2d", desc}); - } - return resources; -} - -absl::Status TensorLinearDescriptor::PerformSelector(const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &, - std::string *result) const -{ - if (selector == "Length") - { - *result = "length"; - return absl::OkStatus(); - } - else if (selector == "Read") - { - return PerformReadSelector(args, result); - } - else if (selector == "GetPtr") - { - if (storage_type != LinearStorageType::BUFFER) - { - return absl::InvalidArgumentError( - "GetPtr selector supported for LinearStorageType::BUFFER only."); - } - *result = "buffer"; - return absl::OkStatus(); - } - else - { - return absl::NotFoundError( - absl::StrCat("TensorLinearDescriptor don't have selector with name - ", selector)); - } -} - -absl::Status TensorLinearDescriptor::PerformReadSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (args.size() != 1) - { - return absl::NotFoundError(absl::StrCat( - "TensorLinearDescriptor Read require one argument, but ", args.size(), " was passed")); - } - if (storage_type == LinearStorageType::BUFFER) - { - *result = absl::StrCat("buffer[", args[0], "]"); - return absl::OkStatus(); - } - else - { - const std::string read = element_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef"; - *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", 0))"); - return absl::OkStatus(); - } -} - -absl::Status TensorLinearDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const -{ - LinearStorage gpu_storage; - RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context)); - *result = absl::make_unique<LinearStorage>(std::move(gpu_storage)); - return absl::OkStatus(); -} - -void TensorLinearDescriptor::UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src, - int aligned_size) -{ - size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size; - if (element_type == DataType::FLOAT32) - { - data.resize(size * sizeof(float) * 4); - float *gpu_data = reinterpret_cast<float *>(data.data()); - for (int i = 0; i < size * 4; ++i) - { - if (i < src.shape.v) - { - gpu_data[i] = src.data[i]; - } - else - { - gpu_data[i] = 0.0f; - } - } - } - // TODO - // It doesn't support F16 yet. I will try to add it later. - // - // else { - // data.resize(size * sizeof(half) * 4); - // half* gpu_data = reinterpret_cast<half*>(data.data()); - // for (int i = 0; i < size * 4; ++i) { - // if (i < src.shape.v) { - // gpu_data[i] = src.data[i]; - // } else { - // gpu_data[i] = 0.0f; - // } - // } - // } -} - -void LinearStorage::Release() -{ - if (memory_) - { - clReleaseMemObject(memory_); - memory_ = nullptr; - } -} - -LinearStorage::LinearStorage(LinearStorage &&storage) - : GPUObject(std::move(storage)), memory_(storage.memory_), depth_(storage.depth_), - storage_type_(storage.storage_type_) -{ - storage.memory_ = nullptr; -} - -LinearStorage &LinearStorage::operator=(LinearStorage &&storage) -{ - if (this != &storage) - { - Release(); - std::swap(memory_, storage.memory_); - std::swap(depth_, storage.depth_); - std::swap(storage_type_, storage.storage_type_); - GPUObject::operator=(std::move(storage)); - } - return *this; -} - -absl::Status LinearStorage::GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const -{ - const auto *linear_desc = dynamic_cast<const TensorLinearDescriptor *>(obj_ptr); - if (!linear_desc) - { - return absl::InvalidArgumentError("Expected TensorLinearDescriptor on input."); - } - - resources->ints.push_back({"length", depth_}); - - if (storage_type_ == LinearStorageType::BUFFER) - { - resources->buffers.push_back({"buffer", memory_}); - } - else - { - resources->images2d.push_back({"tex2d", memory_}); - } - - return absl::OkStatus(); -} - -absl::Status LinearStorage::CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc, - CLContext *context) -{ - storage_type_ = desc.storage_type; - depth_ = desc.size; - uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data()); - if (storage_type_ == LinearStorageType::BUFFER) - { - bool read_only = desc.memory_type == MemoryType::CONSTANT; - uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data()); - // TODO - // It doesn't support F16 yet. I will try to add it later. - // - // const int float4_size = desc.element_type == DataType::FLOAT32 - // ? sizeof(float) * 4 - // : sizeof(half) * 4; - const int float4_size = sizeof(float) * 4; - return CreateCLBuffer(context->context(), depth_ * float4_size, read_only, data_ptr, &memory_); - } - else - { - return CreateRGBAImage2D(context->context(), depth_, 1, - DataTypeToChannelType(desc.element_type), data_ptr, &memory_); - } -} - -LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type) -{ - if (tensor_storage_type == TensorStorageType::BUFFER) - { - return LinearStorageType::BUFFER; - } - else - { - return LinearStorageType::TEXTURE_2D; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h deleted file mode 100644 index f6c3ac82f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__ - -#include <string> -#include <utility> - -#include "absl/strings/str_cat.h" -#include "absl/types/span.h" -#include "GpuObject.h" -#include "OpenclWrapper.h" -#include "TensorType.h" -#include "Util.h" -#include "DataType.h" -#include "Status.h" -#include "Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class LinearStorageType -{ - BUFFER, - TEXTURE_2D -}; - -struct TensorLinearDescriptor : public GPUObjectDescriptor -{ - LinearStorageType storage_type; - DataType element_type; // FLOAT32 or FLOAT16 - MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER - - // optional - int size = 0; - std::vector<uint8_t> data; - - TensorLinearDescriptor() = default; - TensorLinearDescriptor(const TensorLinearDescriptor &) = default; - TensorLinearDescriptor &operator=(const TensorLinearDescriptor &) = default; - TensorLinearDescriptor(TensorLinearDescriptor &&desc); - TensorLinearDescriptor &operator=(TensorLinearDescriptor &&desc); - - void UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src, int aligned_size = 0); - - absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const override; - - GPUResources GetGPUResources() const override; - absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const; - - absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override; - void Release() override; -}; - -LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type); - -// Represent GPU 1D-array of FLT4(float4/half4) values -// Can use inside texture2d or buffer -class LinearStorage : public GPUObject -{ -public: - LinearStorage() {} - ~LinearStorage() override { Release(); } - - // Move only - LinearStorage(LinearStorage &&storage); - LinearStorage &operator=(LinearStorage &&storage); - LinearStorage(const LinearStorage &) = delete; - LinearStorage &operator=(const LinearStorage &) = delete; - - absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const override; - - absl::Status CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc, - CLContext *context); - -private: - void Release(); - - cl_mem memory_ = nullptr; - int depth_; - LinearStorageType storage_type_; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Model.h b/runtime/onert/backend/gpu_cl/open_cl/Model.h deleted file mode 100644 index f434bb22f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Model.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__ - -#include <string> - -#include "absl/types/any.h" -#include "InternalTensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// There is yet another representation of CNN graph. The primary purpose of this -// representation is to simplify graph manipulation. - -using ValueId = uint32_t; - -// Used to emulate quantized behavior. -struct QuantizationParams -{ - float min = 0; - float max = 0; - float scale = 0; -}; - -struct Operation -{ - std::string type; - absl::any attributes; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h b/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h deleted file mode 100644 index 474c56b2a..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__ - -#include <cstdint> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct ModelHints -{ - using ModelHint = uint64_t; - - // By default we want the fastest inference. - static constexpr ModelHint kFastestInference = 0x00000000; - // Can improve compilation time, but inference can be slower. - static constexpr ModelHint kReduceKernelsCount = 0x00000001; - // Can improve tuning time, but inference can be slower. - static constexpr ModelHint kFastTuning = 0x00000002; - - // Experimental. - // Can improve performance and memory consumption, but slow down - // initialization a lot and create more kernels. - static constexpr ModelHint kAllowSpecialKernels = 0x00000004; - - void Add(ModelHint hint) - { - if (hint == kFastestInference) - { - hints = kFastestInference; - } - else - { - hints |= hint; - } - } - - bool Check(ModelHint hint) const { return hints & hint; } - - uint64_t hints = kFastestInference; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc deleted file mode 100644 index dbaf6faf6..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined(_WIN32) -#define __WINDOWS__ -#endif - -#include "OpenclWrapper.h" - -#ifdef __WINDOWS__ -#include <windows.h> -#else -#include <dlfcn.h> -#endif - -#include <string> - -#include "absl/strings/str_cat.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -#ifdef __ANDROID__ -#define LoadFunction(function) \ - if (use_wrapper) \ - { \ - function = reinterpret_cast<PFN_##function>(loadOpenCLPointer(#function)); \ - } \ - else \ - { \ - function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function)); \ - } -#elif defined(__WINDOWS__) -#define LoadFunction(function) \ - function = reinterpret_cast<PFN_##function>(GetProcAddress(libopencl, #function)); -#else -#define LoadFunction(function) \ - function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function)); -#endif - -#ifdef __WINDOWS__ -void LoadOpenCLFunctions(HMODULE libopencl); -#else -void LoadOpenCLFunctions(void **libopencl, bool use_wrapper); -#endif - -absl::Status LoadOpenCL(void **libopencl) -{ -#ifdef __WINDOWS__ - HMODULE libopencl = LoadLibraryA("OpenCL.dll"); - if (libopencl) - { - LoadOpenCLFunctions(libopencl); - return absl::OkStatus(); - } - else - { - DWORD error_code = GetLastError(); - return absl::UnknownError( - absl::StrCat("Can not open OpenCL library on this device, error code - ", error_code)); - } -#else - *libopencl = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL); - if (*libopencl) - { - LoadOpenCLFunctions(libopencl, false); - return absl::OkStatus(); - } - // record error - std::string error(dlerror()); -#ifdef __ANDROID__ - // Pixel phone or auto? - *libopencl = dlopen("libOpenCL-pixel.so", RTLD_NOW | RTLD_LOCAL); - if (!*libopencl) - { - *libopencl = dlopen("libOpenCL-car.so", RTLD_NOW | RTLD_LOCAL); - } - if (*libopencl) - { - typedef void (*enableOpenCL_t)(); - enableOpenCL_t enableOpenCL = - reinterpret_cast<enableOpenCL_t>(dlsym(*libopencl, "enableOpenCL")); - enableOpenCL(); - LoadOpenCLFunctions(libopencl, true); - return absl::OkStatus(); - } -#endif - return absl::UnknownError(absl::StrCat("Can not open OpenCL library on this device - ", error)); -#endif -} - -void UnloadOpenCL(void *libopencl) -{ - if (libopencl) - { - dlclose(libopencl); - } -} - -#ifdef __WINDOWS__ -void LoadOpenCLFunctions(HMODULE libopencl) -{ -#else -#ifdef __ANDROID__ -void LoadOpenCLFunctions(void **libopencl, bool use_wrapper) -{ - typedef void *(*loadOpenCLPointer_t)(const char *name); - loadOpenCLPointer_t loadOpenCLPointer; - if (use_wrapper) - { - loadOpenCLPointer = - reinterpret_cast<loadOpenCLPointer_t>(dlsym(*libopencl, "loadOpenCLPointer")); - } -#else -void LoadOpenCLFunctions(void **libopencl, bool) -{ -#endif // __ANDROID__ -#endif // __WINDOWS__ - - LoadFunction(clGetPlatformIDs); - LoadFunction(clGetPlatformInfo); - LoadFunction(clGetDeviceIDs); - LoadFunction(clGetDeviceInfo); - LoadFunction(clCreateSubDevices); - LoadFunction(clRetainDevice); - LoadFunction(clReleaseDevice); - LoadFunction(clCreateContext); - LoadFunction(clCreateContextFromType); - LoadFunction(clRetainContext); - LoadFunction(clReleaseContext); - LoadFunction(clGetContextInfo); - LoadFunction(clCreateCommandQueueWithProperties); - LoadFunction(clRetainCommandQueue); - LoadFunction(clReleaseCommandQueue); - LoadFunction(clGetCommandQueueInfo); - LoadFunction(clCreateBuffer); - LoadFunction(clCreateSubBuffer); - LoadFunction(clCreateImage); - LoadFunction(clCreatePipe); - LoadFunction(clRetainMemObject); - LoadFunction(clReleaseMemObject); - LoadFunction(clGetSupportedImageFormats); - LoadFunction(clGetMemObjectInfo); - LoadFunction(clGetImageInfo); - LoadFunction(clGetPipeInfo); - LoadFunction(clSetMemObjectDestructorCallback); - LoadFunction(clSVMAlloc); - LoadFunction(clSVMFree); - LoadFunction(clCreateSamplerWithProperties); - LoadFunction(clRetainSampler); - LoadFunction(clReleaseSampler); - LoadFunction(clGetSamplerInfo); - LoadFunction(clCreateProgramWithSource); - LoadFunction(clCreateProgramWithBinary); - LoadFunction(clCreateProgramWithBuiltInKernels); - LoadFunction(clRetainProgram); - LoadFunction(clReleaseProgram); - LoadFunction(clBuildProgram); - LoadFunction(clCompileProgram); - LoadFunction(clLinkProgram); - LoadFunction(clUnloadPlatformCompiler); - LoadFunction(clGetProgramInfo); - LoadFunction(clGetProgramBuildInfo); - LoadFunction(clCreateKernel); - LoadFunction(clCreateKernelsInProgram); - LoadFunction(clRetainKernel); - LoadFunction(clReleaseKernel); - LoadFunction(clSetKernelArg); - LoadFunction(clSetKernelArgSVMPointer); - LoadFunction(clSetKernelExecInfo); - LoadFunction(clGetKernelInfo); - LoadFunction(clGetKernelArgInfo); - LoadFunction(clGetKernelWorkGroupInfo); - LoadFunction(clWaitForEvents); - LoadFunction(clGetEventInfo); - LoadFunction(clCreateUserEvent); - LoadFunction(clRetainEvent); - LoadFunction(clReleaseEvent); - LoadFunction(clSetUserEventStatus); - LoadFunction(clSetEventCallback); - LoadFunction(clGetEventProfilingInfo); - LoadFunction(clFlush); - LoadFunction(clFinish); - LoadFunction(clEnqueueReadBuffer); - LoadFunction(clEnqueueReadBufferRect); - LoadFunction(clEnqueueWriteBuffer); - LoadFunction(clEnqueueWriteBufferRect); - LoadFunction(clEnqueueFillBuffer); - LoadFunction(clEnqueueCopyBuffer); - LoadFunction(clEnqueueCopyBufferRect); - LoadFunction(clEnqueueReadImage); - LoadFunction(clEnqueueWriteImage); - LoadFunction(clEnqueueFillImage); - LoadFunction(clEnqueueCopyImage); - LoadFunction(clEnqueueCopyImageToBuffer); - LoadFunction(clEnqueueCopyBufferToImage); - LoadFunction(clEnqueueMapBuffer); - LoadFunction(clEnqueueMapImage); - LoadFunction(clEnqueueUnmapMemObject); - LoadFunction(clEnqueueMigrateMemObjects); - LoadFunction(clEnqueueNDRangeKernel); - LoadFunction(clEnqueueNativeKernel); - LoadFunction(clEnqueueMarkerWithWaitList); - LoadFunction(clEnqueueBarrierWithWaitList); - LoadFunction(clEnqueueSVMFree); - LoadFunction(clEnqueueSVMMemcpy); - LoadFunction(clEnqueueSVMMemFill); - LoadFunction(clEnqueueSVMMap); - LoadFunction(clEnqueueSVMUnmap); - LoadFunction(clGetExtensionFunctionAddressForPlatform); - LoadFunction(clCreateImage2D); - LoadFunction(clCreateImage3D); - LoadFunction(clEnqueueMarker); - LoadFunction(clEnqueueWaitForEvents); - LoadFunction(clEnqueueBarrier); - LoadFunction(clUnloadCompiler); - LoadFunction(clGetExtensionFunctionAddress); - LoadFunction(clCreateCommandQueue); - LoadFunction(clCreateSampler); - LoadFunction(clEnqueueTask); - - // OpenGL sharing - LoadFunction(clCreateFromGLBuffer); - LoadFunction(clCreateFromGLTexture); - LoadFunction(clEnqueueAcquireGLObjects); - LoadFunction(clEnqueueReleaseGLObjects); - - // cl_khr_egl_event extension - LoadFunction(clCreateEventFromEGLSyncKHR); - - // EGL sharing - LoadFunction(clCreateFromEGLImageKHR); - LoadFunction(clEnqueueAcquireEGLObjectsKHR); - LoadFunction(clEnqueueReleaseEGLObjectsKHR); -} // namespace gpu_cl - -// No OpenCL support, do not set function addresses -PFN_clGetPlatformIDs clGetPlatformIDs; -PFN_clGetPlatformInfo clGetPlatformInfo; -PFN_clGetDeviceIDs clGetDeviceIDs; -PFN_clGetDeviceInfo clGetDeviceInfo; -PFN_clCreateSubDevices clCreateSubDevices; -PFN_clRetainDevice clRetainDevice; -PFN_clReleaseDevice clReleaseDevice; -PFN_clCreateContext clCreateContext; -PFN_clCreateContextFromType clCreateContextFromType; -PFN_clRetainContext clRetainContext; -PFN_clReleaseContext clReleaseContext; -PFN_clGetContextInfo clGetContextInfo; -PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; -PFN_clRetainCommandQueue clRetainCommandQueue; -PFN_clReleaseCommandQueue clReleaseCommandQueue; -PFN_clGetCommandQueueInfo clGetCommandQueueInfo; -PFN_clCreateBuffer clCreateBuffer; -PFN_clCreateSubBuffer clCreateSubBuffer; -PFN_clCreateImage clCreateImage; -PFN_clCreatePipe clCreatePipe; -PFN_clRetainMemObject clRetainMemObject; -PFN_clReleaseMemObject clReleaseMemObject; -PFN_clGetSupportedImageFormats clGetSupportedImageFormats; -PFN_clGetMemObjectInfo clGetMemObjectInfo; -PFN_clGetImageInfo clGetImageInfo; -PFN_clGetPipeInfo clGetPipeInfo; -PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; -PFN_clSVMAlloc clSVMAlloc; -PFN_clSVMFree clSVMFree; -PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties; -PFN_clRetainSampler clRetainSampler; -PFN_clReleaseSampler clReleaseSampler; -PFN_clGetSamplerInfo clGetSamplerInfo; -PFN_clCreateProgramWithSource clCreateProgramWithSource; -PFN_clCreateProgramWithBinary clCreateProgramWithBinary; -PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; -PFN_clRetainProgram clRetainProgram; -PFN_clReleaseProgram clReleaseProgram; -PFN_clBuildProgram clBuildProgram; -PFN_clCompileProgram clCompileProgram; -PFN_clLinkProgram clLinkProgram; -PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler; -PFN_clGetProgramInfo clGetProgramInfo; -PFN_clGetProgramBuildInfo clGetProgramBuildInfo; -PFN_clCreateKernel clCreateKernel; -PFN_clCreateKernelsInProgram clCreateKernelsInProgram; -PFN_clRetainKernel clRetainKernel; -PFN_clReleaseKernel clReleaseKernel; -PFN_clSetKernelArg clSetKernelArg; -PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; -PFN_clSetKernelExecInfo clSetKernelExecInfo; -PFN_clGetKernelInfo clGetKernelInfo; -PFN_clGetKernelArgInfo clGetKernelArgInfo; -PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; -PFN_clWaitForEvents clWaitForEvents; -PFN_clGetEventInfo clGetEventInfo; -PFN_clCreateUserEvent clCreateUserEvent; -PFN_clRetainEvent clRetainEvent; -PFN_clReleaseEvent clReleaseEvent; -PFN_clSetUserEventStatus clSetUserEventStatus; -PFN_clSetEventCallback clSetEventCallback; -PFN_clGetEventProfilingInfo clGetEventProfilingInfo; -PFN_clFlush clFlush; -PFN_clFinish clFinish; -PFN_clEnqueueReadBuffer clEnqueueReadBuffer; -PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect; -PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer; -PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; -PFN_clEnqueueFillBuffer clEnqueueFillBuffer; -PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer; -PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; -PFN_clEnqueueReadImage clEnqueueReadImage; -PFN_clEnqueueWriteImage clEnqueueWriteImage; -PFN_clEnqueueFillImage clEnqueueFillImage; -PFN_clEnqueueCopyImage clEnqueueCopyImage; -PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; -PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; -PFN_clEnqueueMapBuffer clEnqueueMapBuffer; -PFN_clEnqueueMapImage clEnqueueMapImage; -PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; -PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; -PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; -PFN_clEnqueueNativeKernel clEnqueueNativeKernel; -PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; -PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; -PFN_clEnqueueSVMFree clEnqueueSVMFree; -PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; -PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill; -PFN_clEnqueueSVMMap clEnqueueSVMMap; -PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap; -PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform; -PFN_clCreateImage2D clCreateImage2D; -PFN_clCreateImage3D clCreateImage3D; -PFN_clEnqueueMarker clEnqueueMarker; -PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents; -PFN_clEnqueueBarrier clEnqueueBarrier; -PFN_clUnloadCompiler clUnloadCompiler; -PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; -PFN_clCreateCommandQueue clCreateCommandQueue; -PFN_clCreateSampler clCreateSampler; -PFN_clEnqueueTask clEnqueueTask; - -// OpenGL sharing -PFN_clCreateFromGLBuffer clCreateFromGLBuffer; -PFN_clCreateFromGLTexture clCreateFromGLTexture; -PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; -PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; - -// cl_khr_egl_event extension -PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; - -// EGL sharing -PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; -PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; -PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; - -cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret) -{ - if (clCreateImage) - { // clCreateImage available since OpenCL 1.2 - return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret); - } - else - { - return clCreateImage2D(context, flags, image_format, image_desc->image_width, - image_desc->image_height, image_desc->image_row_pitch, host_ptr, - errcode_ret); - } -} - -cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret) -{ - if (clCreateImage) - { // clCreateImage available since OpenCL 1.2 - return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret); - } - else - { - return clCreateImage3D(context, flags, image_format, image_desc->image_width, - image_desc->image_height, image_desc->image_depth, - image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, - errcode_ret); - } -} -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h deleted file mode 100644 index 021f8735a..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h +++ /dev/null @@ -1,560 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__ - -#include "CL/cl.h" -#include "CL/cl_egl.h" -#include "CL/cl_ext.h" -#include "CL/cl_gl.h" -#include "CL/cl_platform.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -absl::Status LoadOpenCL(void **libopencl); -void UnloadOpenCL(void *libopencl); - -typedef cl_int(CL_API_CALL *PFN_clGetPlatformIDs)( - cl_uint /* num_entries */, cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetPlatformInfo)( - cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetDeviceIDs)( - cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, - cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetDeviceInfo)( - cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clCreateSubDevices)( - cl_device_id /* in_device */, const cl_device_partition_property * /* properties */, - cl_uint /* num_devices */, cl_device_id * /* out_devices */, - cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clRetainDevice)(cl_device_id /* device */) - CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clReleaseDevice)(cl_device_id /* device */) - CL_API_SUFFIX__VERSION_1_2; -typedef cl_context(CL_API_CALL *PFN_clCreateContext)( - const cl_context_properties * /* properties */, cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void(CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), - void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_context(CL_API_CALL *PFN_clCreateContextFromType)( - const cl_context_properties * /* properties */, cl_device_type /* device_type */, - void(CL_CALLBACK * /* pfn_notify*/)(const char *, const void *, size_t, void *), - void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clRetainContext)(cl_context /* context */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseContext)(cl_context /* context */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetContextInfo)( - cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueueWithProperties)( - cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clRetainCommandQueue)(cl_command_queue /* command_queue */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseCommandQueue)(cl_command_queue /* command_queue */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetCommandQueueInfo)( - cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_mem(CL_API_CALL *PFN_clCreateBuffer)( - cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_mem(CL_API_CALL *PFN_clCreateSubBuffer)( - cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_mem(CL_API_CALL *PFN_clCreateImage)( - cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, - const cl_image_desc * /* image_desc */, void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_mem(CL_API_CALL *PFN_clCreatePipe)( - cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */, - cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clRetainMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetSupportedImageFormats)( - cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetMemObjectInfo)( - cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetImageInfo)( - cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetPipeInfo)( - cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clSetMemObjectDestructorCallback)( - cl_mem /* memobj */, - void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/), - void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef void *(CL_API_CALL *PFN_clSVMAlloc)(cl_context /* context */, cl_svm_mem_flags /* flags */, - size_t /* size */, - cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0; -typedef void(CL_API_CALL *PFN_clSVMFree)(cl_context /* context */, - void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_sampler(CL_API_CALL *PFN_clCreateSamplerWithProperties)( - cl_context /* context */, const cl_sampler_properties * /* normalized_coords */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clRetainSampler)(cl_sampler /* sampler */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseSampler)(cl_sampler /* sampler */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetSamplerInfo)( - cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithSource)( - cl_context /* context */, cl_uint /* count */, const char ** /* strings */, - const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBinary)( - cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, - const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBuiltInKernels)( - cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, - const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clRetainProgram)(cl_program /* program */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseProgram)(cl_program /* program */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clBuildProgram)( - cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, - const char * /* options */, - void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clCompileProgram)( - cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, - const char * /* options */, cl_uint /* num_input_headers */, - const cl_program * /* input_headers */, const char ** /* header_include_names */, - void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_program(CL_API_CALL *PFN_clLinkProgram)( - cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, - const char * /* options */, cl_uint /* num_input_programs */, - const cl_program * /* input_programs */, - void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clUnloadPlatformCompiler)(cl_platform_id /* platform */) - CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clGetProgramInfo)( - cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetProgramBuildInfo)( - cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_kernel(CL_API_CALL *PFN_clCreateKernel)( - cl_program /* program */, const char * /* kernel_name */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clCreateKernelsInProgram)( - cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clRetainKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clSetKernelArg)(cl_kernel /* kernel */, cl_uint /* arg_index */, - size_t /* arg_size */, const void * /* arg_value */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clSetKernelArgSVMPointer)( - cl_kernel /* kernel */, cl_uint /* arg_index */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clSetKernelExecInfo)( - cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, - const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clGetKernelInfo)( - cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetKernelArgInfo)( - cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clGetKernelWorkGroupInfo)( - cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clWaitForEvents)( - cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clGetEventInfo)( - cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_event(CL_API_CALL *PFN_clCreateUserEvent)( - cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clRetainEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clReleaseEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clSetUserEventStatus)( - cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clSetEventCallback)( - cl_event /* event */, cl_int /* command_exec_callback_type */, - void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clGetEventProfilingInfo)( - cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, - void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clFlush)(cl_command_queue /* command_queue */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clFinish)(cl_command_queue /* command_queue */) - CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBuffer)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, - size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBufferRect)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, - const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, - size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBuffer)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, - size_t /* offset */, size_t /* size */, const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBufferRect)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, - const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, - size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clEnqueueFillBuffer)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */, - size_t /* pattern_size */, size_t /* offset */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBuffer)( - cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, - size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferRect)( - cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, - const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, - size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, - size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int(CL_API_CALL *PFN_clEnqueueReadImage)( - cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, - const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, - size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteImage)( - cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, - const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueFillImage)( - cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */, - const size_t * /* origin[3] */, const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImage)( - cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImageToBuffer)( - cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferToImage)( - cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, - size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef void *(CL_API_CALL *PFN_clEnqueueMapBuffer)( - cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; -typedef void *(CL_API_CALL *PFN_clEnqueueMapImage)( - cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, - size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueUnmapMemObject)( - cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueMigrateMemObjects)( - cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, - const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueNDRangeKernel)( - cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, - const size_t * /* global_work_offset */, const size_t * /* global_work_size */, - const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueNativeKernel)( - cl_command_queue /* command_queue */, void(CL_CALLBACK * /*user_func*/)(void *), - void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueMarkerWithWaitList)( - cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrierWithWaitList)( - cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMFree)( - cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, - void *[] /* svm_pointers[] */, - void(CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */, - cl_uint /* num_svm_pointers */, - void *[] /* svm_pointers[] */, void * /* user_data */), - void * /* user_data */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemcpy)( - cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */, - const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemFill)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */, - size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMap)( - cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */, - void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMUnmap)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddressForPlatform)( - cl_platform_id /* platform */, const char * /* func_name */)CL_API_SUFFIX__VERSION_1_2; -typedef cl_mem(CL_API_CALL *PFN_clCreateImage2D)(cl_context /* context */, cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, cl_int * /* errcode_ret */); -typedef cl_mem(CL_API_CALL *PFN_clCreateImage3D)( - cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, - size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, - size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, - cl_int * /* errcode_ret */); -typedef cl_int(CL_API_CALL *PFN_clEnqueueMarker)(cl_command_queue /* command_queue */, - cl_event * /* event */); -typedef cl_int(CL_API_CALL *PFN_clEnqueueWaitForEvents)(cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */); -typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrier)(cl_command_queue /* command_queue */); -typedef cl_int(CL_API_CALL *PFN_clUnloadCompiler)(); -typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddress)(const char * /* func_name */); -typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueue)( - cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */); -typedef cl_sampler(CL_API_CALL *PFN_clCreateSampler)(cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */); -typedef cl_int(CL_API_CALL *PFN_clEnqueueTask)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -// OpenGL sharing -typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int *); -typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLTexture)( - cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, - cl_GLint /* miplevel */, cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireGLObjects)(cl_command_queue /* command_queue */, - cl_uint /* num_objects */, - const cl_mem * /* mem_objects */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); -typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseGLObjects)( - cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, - cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -// cl_khr_egl_event extension - -// CLeglDisplayKHR is an opaque handle to an EGLDisplay -typedef void *CLeglDisplayKHR; - -// CLeglSyncKHR is an opaque handle to an EGLSync object -typedef void *CLeglSyncKHR; - -typedef cl_event(CL_API_CALL *PFN_clCreateEventFromEGLSyncKHR)(cl_context /* context */, - CLeglSyncKHR /* sync */, - CLeglDisplayKHR /* display */, - cl_int * /* errcode_ret */); - -// EGL sharing -typedef cl_mem(CL_API_CALL *PFN_clCreateFromEGLImageKHR)( - cl_context /*context*/, CLeglDisplayKHR /*display*/, CLeglImageKHR /*image*/, - cl_mem_flags /*flags*/, const cl_egl_image_properties_khr * /*properties*/, - cl_int * /*errcode_ret*/); -typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireEGLObjectsKHR)( - cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/, - cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/); -typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseEGLObjectsKHR)( - cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/, - cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/); - -extern PFN_clGetPlatformIDs clGetPlatformIDs; -extern PFN_clGetPlatformInfo clGetPlatformInfo; -extern PFN_clGetDeviceIDs clGetDeviceIDs; -extern PFN_clGetDeviceInfo clGetDeviceInfo; -extern PFN_clCreateSubDevices clCreateSubDevices; -extern PFN_clRetainDevice clRetainDevice; -extern PFN_clReleaseDevice clReleaseDevice; -extern PFN_clCreateContext clCreateContext; -extern PFN_clCreateContextFromType clCreateContextFromType; -extern PFN_clRetainContext clRetainContext; -extern PFN_clReleaseContext clReleaseContext; -extern PFN_clGetContextInfo clGetContextInfo; -extern PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; -extern PFN_clRetainCommandQueue clRetainCommandQueue; -extern PFN_clReleaseCommandQueue clReleaseCommandQueue; -extern PFN_clGetCommandQueueInfo clGetCommandQueueInfo; -extern PFN_clCreateBuffer clCreateBuffer; -extern PFN_clCreateSubBuffer clCreateSubBuffer; -extern PFN_clCreateImage clCreateImage; -extern PFN_clCreatePipe clCreatePipe; -extern PFN_clRetainMemObject clRetainMemObject; -extern PFN_clReleaseMemObject clReleaseMemObject; -extern PFN_clGetSupportedImageFormats clGetSupportedImageFormats; -extern PFN_clGetMemObjectInfo clGetMemObjectInfo; -extern PFN_clGetImageInfo clGetImageInfo; -extern PFN_clGetPipeInfo clGetPipeInfo; -extern PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; -extern PFN_clSVMAlloc clSVMAlloc; -extern PFN_clSVMFree clSVMFree; -extern PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties; -extern PFN_clRetainSampler clRetainSampler; -extern PFN_clReleaseSampler clReleaseSampler; -extern PFN_clGetSamplerInfo clGetSamplerInfo; -extern PFN_clCreateProgramWithSource clCreateProgramWithSource; -extern PFN_clCreateProgramWithBinary clCreateProgramWithBinary; -extern PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; -extern PFN_clRetainProgram clRetainProgram; -extern PFN_clReleaseProgram clReleaseProgram; -extern PFN_clBuildProgram clBuildProgram; -extern PFN_clCompileProgram clCompileProgram; -extern PFN_clLinkProgram clLinkProgram; -extern PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler; -extern PFN_clGetProgramInfo clGetProgramInfo; -extern PFN_clGetProgramBuildInfo clGetProgramBuildInfo; -extern PFN_clCreateKernel clCreateKernel; -extern PFN_clCreateKernelsInProgram clCreateKernelsInProgram; -extern PFN_clRetainKernel clRetainKernel; -extern PFN_clReleaseKernel clReleaseKernel; -extern PFN_clSetKernelArg clSetKernelArg; -extern PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; -extern PFN_clSetKernelExecInfo clSetKernelExecInfo; -extern PFN_clGetKernelInfo clGetKernelInfo; -extern PFN_clGetKernelArgInfo clGetKernelArgInfo; -extern PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; -extern PFN_clWaitForEvents clWaitForEvents; -extern PFN_clGetEventInfo clGetEventInfo; -extern PFN_clCreateUserEvent clCreateUserEvent; -extern PFN_clRetainEvent clRetainEvent; -extern PFN_clReleaseEvent clReleaseEvent; -extern PFN_clSetUserEventStatus clSetUserEventStatus; -extern PFN_clSetEventCallback clSetEventCallback; -extern PFN_clGetEventProfilingInfo clGetEventProfilingInfo; -extern PFN_clFlush clFlush; -extern PFN_clFinish clFinish; -extern PFN_clEnqueueReadBuffer clEnqueueReadBuffer; -extern PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect; -extern PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer; -extern PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; -extern PFN_clEnqueueFillBuffer clEnqueueFillBuffer; -extern PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer; -extern PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; -extern PFN_clEnqueueReadImage clEnqueueReadImage; -extern PFN_clEnqueueWriteImage clEnqueueWriteImage; -extern PFN_clEnqueueFillImage clEnqueueFillImage; -extern PFN_clEnqueueCopyImage clEnqueueCopyImage; -extern PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; -extern PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; -extern PFN_clEnqueueMapBuffer clEnqueueMapBuffer; -extern PFN_clEnqueueMapImage clEnqueueMapImage; -extern PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; -extern PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; -extern PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; -extern PFN_clEnqueueNativeKernel clEnqueueNativeKernel; -extern PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; -extern PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; -extern PFN_clEnqueueSVMFree clEnqueueSVMFree; -extern PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; -extern PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill; -extern PFN_clEnqueueSVMMap clEnqueueSVMMap; -extern PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap; -extern PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform; -extern PFN_clCreateImage2D clCreateImage2D; -extern PFN_clCreateImage3D clCreateImage3D; -extern PFN_clEnqueueMarker clEnqueueMarker; -extern PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents; -extern PFN_clEnqueueBarrier clEnqueueBarrier; -extern PFN_clUnloadCompiler clUnloadCompiler; -extern PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; -extern PFN_clCreateCommandQueue clCreateCommandQueue; -extern PFN_clCreateSampler clCreateSampler; -extern PFN_clEnqueueTask clEnqueueTask; - -// OpenGL sharing -extern PFN_clCreateFromGLBuffer clCreateFromGLBuffer; -extern PFN_clCreateFromGLTexture clCreateFromGLTexture; -extern PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; -extern PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; - -// cl_khr_egl_event extension -extern PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; - -// EGL sharing -extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; -extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; -extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; - -// For convenient image creation -// It uses clCreateImage if it available (clCreateImage available since cl 1.2) -// otherwise it will use legacy clCreateImage2D -cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret); - -// It uses clCreateImage if it available (clCreateImage available since cl 1.2) -// otherwise it will use legacy clCreateImage3D -cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc b/runtime/onert/backend/gpu_cl/open_cl/Operations.cc deleted file mode 100644 index 2608b5364..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc +++ /dev/null @@ -1,704 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Operations.h" -#include "open_cl/Operations.h" - -#include <algorithm> -#include <cstdint> -#include <set> -#include <string> -#include <utility> -#include <vector> -#include <unordered_map> - -#include "absl/container/flat_hash_map.h" - -#include "Shape.h" -#include "Status.h" -#include "InternalTensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -Padding2D &Padding2D::operator=(const Padding2D &value) -{ - prepended = value.prepended; - appended = value.appended; - return *this; -} - -bool Padding2D::operator==(const Padding2D &value) -{ - return this->prepended == value.prepended && this->appended == value.appended; -} - -bool Padding2D::operator!=(const Padding2D &value) { return !(*this == value); } - -Padding2D &Padding2D::operator-(const Padding2D &value) -{ - prepended.h -= value.prepended.h; - prepended.w -= value.prepended.w; - appended.h -= value.appended.h; - appended.w -= value.appended.w; - return *this; -} - -Padding3D &Padding3D::operator=(const Padding3D &value) -{ - prepended = value.prepended; - appended = value.appended; - return *this; -} - -bool Padding3D::operator==(const Padding3D &value) -{ - return this->prepended == value.prepended && this->appended == value.appended; -} - -bool Padding3D::operator!=(const Padding3D &value) { return !(*this == value); } - -Padding3D &Padding3D::operator-(const Padding3D &value) -{ - prepended.h -= value.prepended.h; - prepended.w -= value.prepended.w; - prepended.d -= value.prepended.d; - appended.h -= value.appended.h; - appended.w -= value.appended.w; - appended.d -= value.appended.d; - return *this; -} - -std::string ToString(enum OperationType op) -{ - switch (op) - { - // case OperationType::ABS: - // return "abs"; - case OperationType::ADD: - return "add"; - // case OperationType::CONCAT: - // return "concat"; - // case OperationType::COS: - // return "cos"; - // case OperationType::EXP: - // return "exp"; - // case OperationType::LOG: - // return "log"; - // case OperationType::NEG: - // return "neg"; - // case OperationType::POOLING_2D: - // return "pooling_2d"; - // case OperationType::REDUCE_MAXIMUM: - // return "reduce_maximum"; - // case OperationType::REDUCE_MINIMUM: - // return "reduce_minimum"; - // case OperationType::REDUCE_PRODUCT: - // return "reduce_product"; - // case OperationType::REDUCE_SUM: - // return "reduce_sum"; - // case OperationType::RESIZE: - // return "resize"; - // case OperationType::RELU: - // return "relu"; - // case OperationType::RSQRT: - // return "rsqrt"; - // case OperationType::SQRT: - // return "sqrt"; - // case OperationType::SQUARE: - // return "square"; - case OperationType::UNKNOWN: - return "unknown_operation"; - } - return ""; -} - -OperationType OperationTypeFromString(const std::string &name) -{ - static const auto operations = new std::unordered_map<std::string, OperationType>({ - // {"abs", OperationType::ABS}, - {"add", OperationType::ADD}, - // {"concat", OperationType::CONCAT}, - // {"cos", OperationType::COS}, - // {"exp", OperationType::EXP}, - // {"log", OperationType::LOG}, - // {"neg", OperationType::NEG}, - // {"pooling_2d", OperationType::POOLING_2D}, - // {"reduce_maximum", OperationType::REDUCE_MAXIMUM}, - // {"reduce_minimum", OperationType::REDUCE_MINIMUM}, - // {"reduce_product", OperationType::REDUCE_PRODUCT}, - // {"reduce_sum", OperationType::REDUCE_SUM}, - // {"relu", OperationType::RELU}, - // {"resize", OperationType::RESIZE}, - // {"rsqrt", OperationType::RSQRT}, - // {"sqrt", OperationType::SQRT}, - // {"square", OperationType::SQUARE}, - }); - auto op = operations->find(name); - return op == operations->end() ? OperationType::UNKNOWN : op->second; -} - -namespace -{ - -template <typename T> T DivideRoundUp(T n, T divisor) { return (n - 1) / divisor + 1; } - -int32_t CalculateOutputSizeBeforeStrides(int32_t input, int32_t kernel, int32_t padding, - int32_t dilation) -{ - const int32_t dilated_kernel = (kernel - 1) * dilation + 1; - return input + padding - dilated_kernel + 1; -} - -template <Axis T> -int32_t CalculateOutputWithoutStrides(const BHWC &input, const Convolution2DAttributes &attr) -{ - return CalculateOutputSizeBeforeStrides( - input.get<T>(), attr.weights.shape.get<T>(), - attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>()); -} - -template <Axis T> -int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Convolution3DAttributes &attr) -{ - return CalculateOutputSizeBeforeStrides( - input.get<T>(), attr.weights.shape.get<T>(), - attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>()); -} - -template <Axis T> -int32_t CalculateOutputWithoutStrides(const BHWC &input, const Pooling2DAttributes &attr) -{ - return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(), - attr.padding.prepended.get<T>() + - attr.padding.appended.get<T>(), - /*dilation=*/1); -} - -template <Axis T> -int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Pooling3DAttributes &attr) -{ - return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(), - attr.padding.prepended.get<T>() + - attr.padding.appended.get<T>(), - /*dilation=*/1); -} - -template <Axis T> -int32_t CalculateOutput(const BHWC &input, const ConvolutionTransposedAttributes &attr) -{ - return (input.get<T>() - 1) * attr.stride.get<T>() - - (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) + - attr.weights.shape.get<T>() + attr.adjacent.get<T>(); -} - -template <Axis T> -int32_t CalculateOutput(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr) -{ - return (input.get<T>() - 1) * attr.stride.get<T>() - - (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) + - attr.weights.shape.get<T>(); -} - -inline int32_t StridedSize(int32_t size, int32_t stride) -{ - return stride == 0 ? -1 : DivideRoundUp(size, stride); -} - -template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWC &input, const AttrT &attr) -{ - return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr), - attr.strides.template get<AxisT>()); -} - -template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWDC &input, const AttrT &attr) -{ - return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr), - attr.strides.template get<AxisT>()); -} - -int32_t CalculateSamePadding(int32_t input, int32_t kernel, int32_t dilation, int32_t stride) -{ - const int32_t dilated_kernel = (kernel - 1) * dilation + 1; - return std::max(0, dilated_kernel - (input - 1) % stride - 1); -} - -// Returns a padding that should be present to make sure image size stays -// the same. -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(), - attr.dilations.get<AxisT>(), attr.strides.get<AxisT>()); -} - -// Returns a padding that should be present to make sure image size stays -// the same. -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(), - attr.dilations.get<AxisT>(), attr.strides.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(), - /*dilation=*/1, attr.stride.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(), - /*dilation=*/1, attr.stride.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(), - /*dilation=*/1, attr.strides.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(), - /*dilation=*/1, attr.strides.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(), - /*dilation=*/1, attr.strides.get<AxisT>()); -} - -template <Axis AxisT> -int32_t CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr) -{ - return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(), - /*dilation=*/1, attr.strides.get<AxisT>()); -} - -Padding2D MakeSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr) -{ - int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr); - int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr); - Padding2D padding; - padding.prepended = HW(padding_height / 2, padding_width / 2); - padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2); - return padding; -} - -Padding3D MakeSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr) -{ - int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr); - int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr); - int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr); - Padding3D padding; - padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2); - padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2, - padding_depth - padding_depth / 2); - return padding; -} - -// If padding depends on input, convert it into fixed padding. -template <class AttrT> Padding2D MakeSamePadding(const BHWC &input, const AttrT &attr) -{ - int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr); - int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr); - Padding2D padding; - padding.prepended = HW(padding_height / 2, padding_width / 2); - padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2); - return padding; -} - -// If padding depends on input, convert it into fixed padding. -template <class AttrT> Padding3D MakeSamePadding(const BHWDC &input, const AttrT &attr) -{ - int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr); - int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr); - int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr); - Padding3D padding; - padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2); - padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2, - padding_depth - padding_depth / 2); - return padding; -} - -} // namespace - -BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr) -{ - return BHWC( - input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h, - input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w, input.c); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr) -{ - return BHWDC( - input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h, - input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w, - input.d * attr.strides.d - attr.padding.prepended.d - attr.padding.appended.d, input.c); -} - -BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr) -{ - return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), input.c); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr) -{ - return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr), - input.c); -} - -BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr) -{ - return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>()); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr) -{ - return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>()); -} - -BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr) -{ - return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>()); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr) -{ - return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>()); -} - -BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr) -{ - return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() * - attr.weights.shape.get<Axis::INPUT_CHANNELS>()); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr) -{ - return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr), - CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr), - attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() * - attr.weights.shape.get<Axis::INPUT_CHANNELS>()); -} - -BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr) -{ - (void)input; - return BHWC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b), - StridedSize(attr.ends.h - attr.starts.h, attr.strides.h), - StridedSize(attr.ends.w - attr.starts.w, attr.strides.w), - StridedSize(attr.ends.c - attr.starts.c, attr.strides.c)); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr) -{ - (void)input; - return BHWDC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b), - StridedSize(attr.ends.h - attr.starts.h, attr.strides.h), - StridedSize(attr.ends.w - attr.starts.w, attr.strides.w), - StridedSize(attr.ends.d - attr.starts.d, attr.strides.d), - StridedSize(attr.ends.c - attr.starts.c, attr.strides.c)); -} - -BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr) -{ - return BHWC( - attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h, - attr.appended.w + attr.prepended.w + input.w, attr.appended.c + attr.prepended.c + input.c); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr) -{ - return BHWDC( - attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h, - attr.appended.w + attr.prepended.w + input.w, attr.appended.d + attr.prepended.d + input.d, - attr.appended.c + attr.prepended.c + input.c); -} - -BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr) -{ - return BHWC(input.b, 1, 1, attr.weights.shape.o); -} - -BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr) -{ - const int b = attr.dims.find(Axis::BATCH) == attr.dims.end() ? input.b : 1; - const int h = attr.dims.find(Axis::HEIGHT) == attr.dims.end() ? input.h : 1; - const int w = attr.dims.find(Axis::WIDTH) == attr.dims.end() ? input.w : 1; - const int c = attr.dims.find(Axis::CHANNELS) == attr.dims.end() ? input.c : 1; - return BHWC(b, h, w, c); -} - -absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr, - BHWC *output_shape) -{ - BHWC new_shape = input[0]; - switch (attr.axis) - { - case Axis::CHANNELS: - for (size_t i = 1; i < input.size(); i++) - { - if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Height, Width and Batch must be the same when concatenating " - "by channels axis"); - } - new_shape.c += input[i].c; - } - break; - case Axis::HEIGHT: - for (size_t i = 1; i < input.size(); i++) - { - if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Channels, Width and Batch must be the same when concatenating " - "by height axis"); - } - new_shape.h += input[i].h; - } - break; - case Axis::WIDTH: - for (size_t i = 1; i < input.size(); i++) - { - if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Height, Channels and Batch must be the same when concatenating " - "by width axis"); - } - new_shape.w += input[i].w; - } - break; - case Axis::BATCH: - for (size_t i = 1; i < input.size(); i++) - { - if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].w != new_shape.w) - { - return absl::InvalidArgumentError( - "Width, Height and Channels must be the same when concatenating " - "by batch axis"); - } - new_shape.b += input[i].b; - } - break; - default: - return absl::InvalidArgumentError("Invalid axis"); - break; - } - *output_shape = new_shape; - return absl::OkStatus(); -} - -absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr, - BHWDC *output_shape) -{ - BHWDC new_shape = input[0]; - switch (attr.axis) - { - case Axis::CHANNELS: - for (size_t i = 1; i < input.size(); ++i) - { - if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].d != new_shape.d || - input[i].b != new_shape.b) - { - return absl::InvalidArgumentError("Height, Width, Batch and Depth must be the same when " - "concatenating " - "by channels axis"); - } - new_shape.c += input[i].c; - } - break; - case Axis::HEIGHT: - for (size_t i = 1; i < input.size(); ++i) - { - if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].d != new_shape.d || - input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Width, Depth, Batch and Channels must be the same when " - "concatenating " - "by height axis"); - } - new_shape.h += input[i].h; - } - break; - case Axis::WIDTH: - for (size_t i = 1; i < input.size(); ++i) - { - if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].d != new_shape.d || - input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Height, Depth, Batch and Channels must be the same when " - "concatenating " - "by width axis"); - } - new_shape.w += input[i].w; - } - break; - case Axis::DEPTH: - for (size_t i = 1; i < input.size(); ++i) - { - if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c || - input[i].b != new_shape.b) - { - return absl::InvalidArgumentError( - "Width, Height, Batch and Channels must be the same when " - "concatenating " - "by depth axis"); - } - new_shape.d += input[i].d; - } - break; - case Axis::BATCH: - for (size_t i = 1; i < input.size(); ++i) - { - if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c || - input[i].d != new_shape.d) - { - return absl::InvalidArgumentError( - "Width, Height, Depth and Channels must be the same when " - "concatenating " - "by batch axis"); - } - new_shape.b += input[i].b; - } - break; - default: - return absl::InvalidArgumentError("Invalid axis"); - } - *output_shape = new_shape; - return absl::OkStatus(); -} - -Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr) -{ - return MakeSamePadding(input, attr); -} - -float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr) -{ - return attr.align_corners && input_size > 1 && output_size > 1 - ? static_cast<float>(input_size - 1) / (output_size - 1) - : static_cast<float>(input_size) / output_size; -} - -float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr) -{ - return attr.align_corners && input_size > 1 && output_size > 1 - ? static_cast<float>(input_size - 1) / (output_size - 1) - : static_cast<float>(input_size) / output_size; -} - -BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr) -{ - return BHWC(input.b, attr.new_shape.h, attr.new_shape.w, input.c); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr) -{ - return BHWDC(input.b, attr.new_shape.h, attr.new_shape.w, attr.new_shape.d, input.c); -} - -BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr) -{ - return BHWC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w), - input.get(attr.perm.c)); -} - -BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr) -{ - return BHWDC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w), - input.get(attr.perm.d), input.get(attr.perm.c)); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.h b/runtime/onert/backend/gpu_cl/open_cl/Operations.h deleted file mode 100644 index 825eb90a4..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Operations.h +++ /dev/null @@ -1,586 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__ - -#include <cstdint> -#include <set> -#include <string> -#include <vector> - -#include "absl/types/variant.h" - -#include "DataType.h" -#include "Shape.h" -#include "Status.h" -#include "InternalTensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class OperationType -{ - UNKNOWN = 0, - // ABS, - ADD, - // BATCH_TO_SPACE, - // BATCH_NORMALIZATION, - // BATCHED_MATMUL, - // CONCAT, - // CONST, - // CONVOLUTION_2D, - // CONVOLUTION_TRANSPOSED, - // COPY, - // COS, - // DEPTHWISE_CONVOLUTION, - // DIV, - // ELU, - // EQUAL, - // EXP, - // FULLY_CONNECTED, - // GREATER, - // GREATER_EQUAL, - // HARD_SWISH, - // LESS, - // LESS_EQUAL, - // LOG, - // LSTM, - // MAXIMUM, - // MAX_UNPOOLING_2D, - // MEAN, - // MEAN_STDDEV_NORMALIZATION, - // MINIMUM, - // MUL, - // NEG, - // NOT_EQUAL, - // PAD, - // POOLING_2D, - // POW, - // PRELU, - // Used to accurately run inference on quantized models. - // QUANTIZE_AND_DEQUANTIZE, - // REDUCE_MAXIMUM, - // REDUCE_MINIMUM, - // REDUCE_PRODUCT, - // REDUCE_SUM, - // RELU, - // RESHAPE, - // RESIZE, - // RSQRT, - // SIGMOID, - // SIN, - // SLICE, - // SOFTMAX, - // SPACE_TO_BATCH, - // SPACE_TO_DEPTH, - // SQRT, - // SQUARE, - // SQUARED_DIFF, - // SUB, - // TANH, - // TRANSPOSE, -}; - -std::string ToString(enum OperationType op); - -OperationType OperationTypeFromString(const std::string &name); - -typedef absl::variant<absl::monostate, InternalTensor<HWC, DataType::FLOAT32>, - InternalTensor<Linear, DataType::FLOAT32>, float> - TensorOrScalar; - -struct Padding2D -{ - Padding2D() = default; - Padding2D(const Padding2D &); - Padding2D &operator=(const Padding2D &value); - bool operator==(const Padding2D &value); - bool operator!=(const Padding2D &value); - Padding2D &operator-(const Padding2D &value); - - // Padding values for every axis (if needed), where 'prepended' defines - // padding for the beginning of each axis and 'appended' represents end part - // of the corresponding axis. - HW prepended = HW(-1, -1); - HW appended = HW(-1, -1); -}; - -struct Padding3D -{ - Padding3D() = default; - Padding3D(const Padding3D &); - Padding3D &operator=(const Padding3D &value); - bool operator==(const Padding3D &value); - bool operator!=(const Padding3D &value); - Padding3D &operator-(const Padding3D &value); - // Padding values for every axis (if needed), where 'prepended' defines - // padding for the beginning of each axis and 'appended' represents end part - // of the corresponding axis. - HWD prepended = HWD(0, 0, 0); - HWD appended = HWD(0, 0, 0); -}; - -struct Crop2D : public Padding2D -{ -}; - -struct SpaceToBatchAttributes -{ - HW block; - Padding2D padding; -}; - -struct BatchToSpaceAttributes -{ - HW block; - Crop2D crop; -}; - -enum class PoolingType -{ - UNDEFINED = 0, - - // average pooling - AVERAGE = 1, - - // max pooling - MAX = 2, -}; - -struct Pooling2DAttributes -{ - PoolingType type = PoolingType::UNDEFINED; - // Strides for every axis. - HW strides = HW(-1, -1); - HW kernel = HW(-1, -1); - Padding2D padding; - // NOTE(akulik): technically the number of outputs from Pooling node indicates - // whether indices are needed or not, but I decided to keep it inside - // attributes to simplify processing. - bool output_indices = false; -}; - -struct Pooling3DAttributes -{ - PoolingType type = PoolingType::UNDEFINED; - // Strides for every axis. - HWD strides = HWD(0, 0, 0); - HWD kernel = HWD(0, 0, 0); - Padding3D padding; - // NOTE(akulik): technically the number of outputs from Pooling node indicates - // whether indices are needed or not, but I decided to keep it inside - // attributes to simplify processing. - bool output_indices = false; -}; - -struct MaxUnpooling2DAttributes -{ - // Strides for every axis. - HW strides = HW(-1, -1); - HW kernel = HW(-1, -1); - Padding2D padding; -}; - -struct MaxUnpooling3DAttributes -{ - // Strides for every axis. - HWD strides = HWD(0, 0, 0); - HWD kernel = HWD(0, 0, 0); - Padding3D padding; -}; - -struct MeanAttributes -{ - // The vector of dimensions to calculate mean along. - std::set<Axis> dims; -}; - -struct ConcatAttributes -{ - // Defines axis by which to concat on. - Axis axis = Axis::UNKNOWN; -}; - -// @return shape of a tensor after MaxUnpooling2D operation is applied to -// the given input. -BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr); - -// @return shape of a tensor after MaxUnpooling3D operation is applied to -// the given input. -BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr); - -// @return shape of a tensor after Pooling2D operation is applied to the given -// input. -BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr); - -// @return shape of a tensor after Pooling3D operation is applied to the given -// input. -BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr); - -// @return shape of a tensor after Concat operation is applied to the given -// input. -absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr, - BHWC *output_shape); - -// @return shape of a tensor after Concat operation is applied to the given -// input. -absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr, - BHWDC *output_shape); - -// @return padding for pooling operation to make sure output keep the same shape -// as the given input. -Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr); - -// @return padding for pooling operation to make sure output keep the same shape -// as the given input. -Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr); - -// @return padding for max unpooling operation to make sure output keep the same -// shape as the given input. -Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr); - -// @return padding for max unpooling operation to make sure output keep the same -// shape as the given input. -Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr); - -struct Convolution2DAttributes -{ - HW strides = HW(1, 1); // Along each axis. - HW dilations = HW(1, 1); // Along each axis. - Padding2D padding; - - InternalTensor<OHWI, DataType::FLOAT32> weights; - InternalTensor<Linear, DataType::FLOAT32> bias; // optional -}; - -struct Convolution3DAttributes -{ - HWD strides = HWD(0, 0, 0); // Along each axis. - HWD dilations = HWD(0, 0, 0); // Along each axis. - Padding3D padding; - - InternalTensor<OHWDI, DataType::FLOAT32> weights; - InternalTensor<Linear, DataType::FLOAT32> bias; // optional -}; - -// @return shape of a tensor after Convolution2D operation is applied to -// the given input. -BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr); - -// @return shape of a tensor after Convolution3D operation is applied to -// the given input. -BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr); - -// @return padding for convolution operation to make sure output keep the same -// shape as the given input. -Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr); - -// @return padding for convolution operation to make sure output keep the same -// shape as the given input. -Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr); - -struct ConvolutionTransposedAttributes -{ - HW stride = HW(1, 1); // Along each axis. - HW adjacent; // TODO(sorokin): No op on Flow. - Padding2D padding; - - InternalTensor<OHWI, DataType::FLOAT32> weights; - InternalTensor<Linear, DataType::FLOAT32> bias; // optional -}; - -struct ConvolutionTransposed3DAttributes -{ - HWD stride = HWD(0, 0, 0); // Along each axis. - Padding3D padding; - - InternalTensor<OHWDI, DataType::FLOAT32> weights; - InternalTensor<Linear, DataType::FLOAT32> bias; // optional -}; - -Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr); - -Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr); - -// @return shape of a tensor after ConvolutionTransposed operation is applied to -// the given input. -BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr); - -// @return shape of a tensor after ConvolutionTransposed3D operation is applied -// to -// the given input. -BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr); - -struct DepthwiseConvolution2DAttributes : public Convolution2DAttributes -{ -}; -struct DepthwiseConvolution3DAttributes : public Convolution3DAttributes -{ -}; - -// @return shape of a tensor after DepthwiseConvolution2D operation is applied -// to the given input. -BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr); - -// @return shape of a tensor after DepthwiseConvolution3D operation is applied -// to the given input. -BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr); - -// @return padding for depthwise convolution operation to make sure output keep -// the same shape as the given input. -Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr); - -// @return padding for depthwise convolution operation to make sure output keep -// the same shape as the given input. -Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr); - -// f(x):= { -// if x < 0 : x -> alpha * x -// if x >= 0 : x -> min(clip, x) -// } -// -// Examples: -// - ReLU: clip = 0, alpha = 0 -// - ReLU6: clip = 6, alpha = 0 -// - Leaky ReLU: clip = 0, alpha = a -struct ReLUAttributes -{ - // clip <= 0 mean it is not set. - float clip = 0; - - float alpha = 0; -}; - -struct PReLUAttributes -{ - // clip <= 0 mean it is not set. - float clip = 0; - - // If alpha is linear, then it is sharded across CHANNELS axis, otherwise - // full shape alpha is required. - absl::variant<InternalTensor<Linear, DataType::FLOAT32>, InternalTensor<HWC, DataType::FLOAT32>> - alpha; -}; - -struct ReduceAttributes -{ - Axis axis = Axis::UNKNOWN; -}; - -struct SoftmaxAttributes -{ - Axis axis = Axis::UNKNOWN; -}; - -enum LstmKernelType -{ - FULL = 0, - BASIC = 1, // Currently, only basic is supported. -}; - -struct LstmAttributes -{ - LstmKernelType kernel_type = LstmKernelType::BASIC; -}; - -enum class SamplingType -{ - UNKNOWN = 0, - NEAREST = 1, - BILINEAR = 2, -}; - -struct Resize2DAttributes -{ - HW new_shape; - - SamplingType type = SamplingType::UNKNOWN; - - // If true, the centers of the 4 corner pixels of the input and output tensors - // are aligned, preserving the values at the corner pixels. Defaults to false. - bool align_corners = false; - - bool half_pixel_centers = false; -}; - -// TODO(b/147771327): rename to Resize3D -struct Resize3DAttributes -{ - HWD new_shape; - - SamplingType type = SamplingType::NEAREST; - - // If true, the centers of the 8 corner pixels of the input and output tensors - // are aligned, preserving the values at the corner pixels. Defaults to false. - bool align_corners = false; - - bool half_pixel_centers = false; -}; - -float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr); - -float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr); - -// @return shape of a tensor after scale operation is applied to the given -// input. -BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr); - -// @return shape of a tensor after scale operation is applied to the given -// input. -BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr); - -enum class PaddingContentType -{ - ZEROS = 0, - REFLECT = 1, - EDGE = 2, -}; - -struct PadAttributes -{ - PaddingContentType type = PaddingContentType::ZEROS; - - BHWC prepended; - BHWC appended; -}; - -// @return shape of a tensor after Pad operation is applied to the given input. -BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr); - -struct Pad3DAttributes -{ - PaddingContentType type = PaddingContentType::ZEROS; - - BHWDC prepended; - BHWDC appended; -}; - -// @return shape of a tensor after Pad3D operation is applied to the given -// input. -BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr); - -struct ConstTensorAttributes -{ - InternalTensor<BHWC, DataType::FLOAT32> tensor; -}; - -// Simple slicing without advanced support for shrinking, reverse slicing etc. -struct SliceAttributes -{ - // Specifies start and end dimensions for slicing. - BHWC starts; - BHWC ends; - - // Stride should be >= 1. - BHWC strides; -}; - -// @return shape of a tensor after Slice2D operation is applied to the given -// input. -BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr); - -// Simple slicing without advanced support for shrinking, reverse slicing etc. -struct Slice3DAttributes -{ - // Specifies start and end dimensions for slicing. - BHWDC starts; - BHWDC ends; - - // Stride should be >= 1. - BHWDC strides; -}; - -// @return shape of a tensor after Slice3D operation is applied to the given -// input. -BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr); - -struct FullyConnectedAttributes -{ - InternalTensor<OHWI, DataType::FLOAT32> weights; - InternalTensor<Linear, DataType::FLOAT32> bias; -}; - -// @return shape of a tensor after FullyConnected operation is applied to -// the given input. -BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr); - -// @return shape of a tensor after Mean operation is applied to the given input. -BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr); - -struct ElementwiseAttributes -{ - TensorOrScalar param; - // For elementwise operation with 2 inputs op(A, B), runtime_tensor_is_second - // true when runtime tensor is B(on second position). this is important for - // ops that non commutative, for example substract. - bool runtime_tensor_is_second = false; -}; - -struct ReshapeAttributes -{ - BHWC new_shape; -}; - -struct Reshape3DAttributes -{ - BHWDC new_shape; -}; - -struct TransposeAttributes -{ - // A permutation of the dimensions of input tensor - BHWC perm; -}; - -// @return shape of a tensor after Transpose operation is applied to -// the given input. -BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr); - -struct Transpose3DAttributes -{ - // A permutation of the dimensions of input tensor - BHWDC perm; -}; - -// @return shape of a tensor after Transpose3D operation is applied to -// the given input. -BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr); - -struct SpaceToDepthAttributes -{ - int block_size; -}; - -// These help perform a combination of Quantize & Dequantize to adjust float -// values like quantized inference would. -struct QuantizeAndDequantizeAttributes -{ - float min = 0; - float max = 0; - float scale = 0; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc b/runtime/onert/backend/gpu_cl/open_cl/Precision.cc deleted file mode 100644 index bd908bd43..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Precision.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::string ToString(CalculationsPrecision precision) -{ - switch (precision) - { - case CalculationsPrecision::F32_F16: - return "CalculationsPrecision::F32_F16"; - case CalculationsPrecision::F32: - return "CalculationsPrecision::F32"; - case CalculationsPrecision::F16: - return "CalculationsPrecision::F16"; - } - return " "; -} - -DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision) -{ - if (precision == CalculationsPrecision::F32) - { - return DataType::FLOAT32; - } - else - { - return DataType::FLOAT16; - } - return DataType::UNKNOWN; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.h b/runtime/onert/backend/gpu_cl/open_cl/Precision.h deleted file mode 100644 index cb910c783..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Precision.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__ - -#include <string> - -#include "DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class CalculationsPrecision -{ - F32, - F32_F16, - F16 -}; -// F32 - all data and all math ops in F32 -// F16 - all data and all math ops in F16 -// F32_F16 - as F16, but some operations (Convolution, -// DepthwiseConvolution, FullyConnected, ConvolutionTransposed) -// have accumulator in F32 and usually it calculates 4 mads in F16, sum them, -// than converts this partial sum to F32 and add to accumulator. - -DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision); - -std::string ToString(CalculationsPrecision precision); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc deleted file mode 100644 index 350d7a1c5..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ProgramCache.h" - -#include <cstdint> -#include <string> - -#include "ClProgram.h" -#include "Status.h" -#include "Util.h" -#include "farmhash.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -ProgramCache::ProgramDescriptor::ProgramDescriptor(const std::string &code_text, - const std::string &options, - bool use_fingerprints) - : code(code_text), compiler_options(options), use_fingerprint(use_fingerprints) -{ - const uint64_t code_fingerprint = ::util::Fingerprint64(code); - const uint64_t options_fingerprint = ::util::Fingerprint64(compiler_options); - fingerprint = code_fingerprint + options_fingerprint; -} - -ProgramCache::ProgramDescriptor::ProgramDescriptor(uint64_t fingerprints) - : fingerprint(fingerprints), use_fingerprint(true) -{ -} - -ProgramCache::ProgramCache(ProgramCache &&program_cache) - : use_fingerprints_(program_cache.use_fingerprints_), - programs_(std::move(program_cache.programs_)) -{ -} - -ProgramCache &ProgramCache::operator=(ProgramCache &&program_cache) -{ - if (this != &program_cache) - { - use_fingerprints_ = program_cache.use_fingerprints_; - programs_ = std::move(program_cache.programs_); - } - return *this; -} - -absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code, - const std::string &function_name, - const std::vector<CompilerOptions> &compiler_options, - const CLContext &context, const CLDevice &device, - CLKernel *result) -{ - const std::string options = CompilerOptionsToString(device, compiler_options); - ProgramDescriptor desc{code, options, use_fingerprints_}; - auto it = programs_.find(desc); - if (it != programs_.end()) - { - return result->CreateFromProgram(it->second, function_name); - } - - CLProgram program; - RETURN_IF_ERROR(CreateCLProgram(code, options, context, device, &program)); - RETURN_IF_ERROR(result->CreateFromProgram(program, function_name)); - programs_.insert(std::make_pair(std::move(desc), std::move(program))); - return absl::OkStatus(); -} - -absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code, - const std::string &function_name, - const CLContext &context, const CLDevice &device, - CLKernel *result) -{ - return GetOrCreateCLKernel(code, function_name, {}, context, device, result); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h deleted file mode 100644 index 3f5ee0215..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__ - -#include <cstdint> -#include <string> -#include <vector> - -#include "absl/container/flat_hash_map.h" -#include "absl/types/span.h" -#include "ClContext.h" -#include "ClDevice.h" -#include "ClKernel.h" -#include "ClProgram.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class ProgramCache -{ -public: - ProgramCache() = default; - - ProgramCache(ProgramCache &&program_cache); - ProgramCache &operator=(ProgramCache &&program_cache); - ProgramCache(const ProgramCache &) = delete; - ProgramCache &operator=(const ProgramCache &) = delete; - - absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name, - const std::vector<CompilerOptions> &compiler_options, - const CLContext &context, const CLDevice &device, - CLKernel *result); - - absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name, - const CLContext &context, const CLDevice &device, - CLKernel *result); - -private: - struct ProgramDescriptor - { - ProgramDescriptor() = default; - ProgramDescriptor(const std::string &code_text, const std::string &options, - bool use_fingerprint); - explicit ProgramDescriptor(uint64_t fingerprint); - - std::string code; - std::string compiler_options; - uint64_t fingerprint; - bool use_fingerprint; - }; - struct ProgramDescriptorHasher - { - std::size_t operator()(const ProgramDescriptor &k) const - { - if (k.use_fingerprint) - { - return std::hash<uint64_t>()(k.fingerprint); - } - else - { - return std::hash<std::string>()(k.code) + std::hash<std::string>()(k.compiler_options); - } - } - }; - struct ProgramDescriptorEqual - { - bool operator()(const ProgramDescriptor &a, const ProgramDescriptor &b) const - { - if (a.use_fingerprint && b.use_fingerprint) - { - return a.fingerprint == b.fingerprint; - } - else - { - return a.compiler_options == b.compiler_options && a.code == b.code; - } - } - }; - - // There is a low probability of a hash collision when cache is deserialized - // because only fingerprints are serialized instead of full source code. - bool use_fingerprints_ = false; - absl::flat_hash_map<ProgramDescriptor, CLProgram, ProgramDescriptorHasher, ProgramDescriptorEqual> - programs_; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc b/runtime/onert/backend/gpu_cl/open_cl/Shape.cc deleted file mode 100644 index 5a2374516..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Shape.h" - -#include <stdint.h> - -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -struct GetAxisByIndexFunc -{ - template <Layout T> Axis operator()() const { return GetAxis<T>(index); } - int32_t index; -}; - -struct GetIndexByAxisFunc -{ - template <Layout T> int operator()() const { return GetAxisIndex<T>(axis); } - Axis axis; -}; - -struct NumAxisFunc -{ - template <Layout T> int operator()() const { return Size<T>(); } -}; - -} // namespace - -std::string ToString(Axis axis) -{ - switch (axis) - { - case Axis::BATCH: - return "batch"; - case Axis::CHANNELS: - return "channels"; - case Axis::INPUT_CHANNELS: - return "input_channels"; - case Axis::OUTPUT_CHANNELS: - return "output_channels"; - case Axis::HEIGHT: - return "height"; - case Axis::WIDTH: - return "width"; - case Axis::VALUE: - return "value"; - case Axis::DEPTH: - return "depth"; - case Axis::UNKNOWN: - return "unknown"; - } - return "undefined"; -} - -std::string ToString(Layout layout) -{ - switch (layout) - { - case Layout::SCALAR: - return "scalar"; - case Layout::LINEAR: - return "linear"; - case Layout::HW: - return "hw"; - case Layout::HWD: - return "hwd"; - case Layout::CHW: - return "chw"; - case Layout::HWC: - return "hwc"; - case Layout::HWDC: - return "hwdc"; - case Layout::OHWI: - return "ohwi"; - case Layout::IHWO: - return "ihwo"; - case Layout::OIHW: - return "oihw"; - case Layout::IOHW: - return "iohw"; - case Layout::BHWC: - return "bhwc"; - case Layout::BHWDC: - return "bhwdc"; - case Layout::OHWDI: - return "ohwi"; - case Layout::UNKNOWN: - return "unknown"; - } - return "undefined"; -} - -Axis GetAxis(Layout layout, int32_t index) -{ - return DispatchByLayout(layout, GetAxisByIndexFunc{index}); -} - -int GetAxisIndex(Layout layout, Axis axis) -{ - return DispatchByLayout(layout, GetIndexByAxisFunc{axis}); -} - -bool HasAxis(Layout layout, Axis axis) { return GetAxisIndex(layout, axis) >= 0; } - -int Size(Layout layout) { return DispatchByLayout(layout, NumAxisFunc()); } - -std::string ToString(const Shape &s) -{ - return absl::StrCat("{", ToString(s.layout), ", {", absl::StrJoin(s.dimensions, ", "), "}}"); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.h b/runtime/onert/backend/gpu_cl/open_cl/Shape.h deleted file mode 100644 index 3767e106f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Shape.h +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__ - -#include <stddef.h> -#include <stdint.h> - -#include <array> -#include <functional> -#include <numeric> -#include <string> -#include <utility> -#include <vector> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class Axis -{ - UNKNOWN = 0, - CHANNELS = 1, - INPUT_CHANNELS = 2, - OUTPUT_CHANNELS = 3, - HEIGHT = 4, - WIDTH = 5, - BATCH = 6, - VALUE = 7, - DEPTH = 8, -}; - -std::string ToString(Axis t); - -// Layout represents axis order. -enum class Layout -{ - UNKNOWN = 0, - SCALAR = 1, - LINEAR = 2, - HW = 3, - CHW = 4, - HWC = 5, - OIHW = 6, - OHWI = 7, - IHWO = 8, - IOHW = 9, - BHWC = 10, - HWDC = 11, - BHWDC = 12, - HWD = 13, - OHWDI = 14, -}; - -std::string ToString(Layout l); - -// Returns number of axis for the fixed layout. -template <Layout T> constexpr int Size(); - -// Returns number of axis for the given layout. -int Size(Layout layout); - -// Returns Axis for the given index and fixed layout. -template <Layout T> constexpr Axis GetAxis(int index); - -// Returns axis for the given layout and index. -Axis GetAxis(Layout layout, int32_t index); - -// Returns axis index for the given axis and fixed layout. -template <Layout T> constexpr int GetAxisIndex(Axis axis); - -// Returns axis index for the given layout and axis. -int GetAxisIndex(Layout layout, Axis axis); - -// Checks if fixed layout has given axis -template <Layout T> constexpr bool HasAxis(Axis axis); - -// Checks if given layout has given axis -bool HasAxis(Layout layout, Axis axis); - -// Stores Layout(axis set and order) and value for dimensions. -struct Shape -{ - Shape() : layout(Layout::UNKNOWN), dimensions() {} - - explicit Shape(Layout t) : layout(t), dimensions(Size(t)) {} - - Shape(Layout t, std::vector<int32_t> d) : layout(t), dimensions(std::move(d)) {} - - bool operator==(const Shape &other) const - { - return (layout == other.layout) && (dimensions == other.dimensions); - } - - bool operator!=(const Shape &other) const { return !operator==(other); } - - // All methods below are matching same methods defined in StrongShape to - // make sure generic algorithms work both ways. - - // Returns back a dimension or -1 if it is not found. - template <Axis D> int32_t get() const; - int32_t get(Axis axis) const; - - template <Axis D> bool set(int32_t t); - bool set(Axis axis, int32_t t); - - Axis axis(int index) const { return GetAxis(layout, index); } - - int index(Axis axis) const { return GetAxisIndex(layout, axis); } - - bool has(Axis axis) const { return HasAxis(layout, axis); } - - int64_t DimensionsProduct() const - { - return std::accumulate(dimensions.begin(), dimensions.end(), 1ll, std::multiplies<int64_t>()); - } - - Layout layout = Layout::UNKNOWN; - - std::vector<int32_t> dimensions; -}; - -std::string ToString(const Shape &s); - -// StrongShape provides convenient explicit access to dimensions stored in -// shape, e.g. StrongShape<Layout::HW> s; provides s.h and s.w accessors. -// -// There is a conversion possible both ways between Shape and StrongShape. -// -// OIHW oihw; // specific shape -// Shape l = oihw.ToShape(); -// -// OHWI other; // notice not the same but compatible shape. -// if (!other.Adopt(l)) { -// // error handling -// } -// -// StrongShape supports the following set of operations: -// -// // Returns number of axis in the shape class. -// static constexpr int size(); -// -// // Returns Axis for the given index or Axis::UNKNOWN if index -// // falls outside of the defined range in this shape. -// static constexpr Axis axis(int index); -// -// // Returns index for the given axis or -1 if axis is not defined in this -// // shape. -// static constexpr int index(Axis axis); -// -// // Getters -// int32_t get(int index) const; -// int32_t get(Axis axis) const; -// int32_t get<Axis>() const; -// -// // Setters that return false if set was not successful. -// bool set(int index, int32_t v); -// bool set(Axis axis, int32_t v); -// bool set<Axis>(int32_t v); -// -// // Returns shape's layout. -// static const Layout layout; -// -// // Turns specific shape into generic shape. -// Shape ToShape() const; -// -// // Copies all dimensions from the given shape. -// bool Adopt(const Shape&); -// -template <Layout L> struct StrongShape; - -using Scalar = StrongShape<Layout::SCALAR>; -using Linear = StrongShape<Layout::LINEAR>; -using HW = StrongShape<Layout::HW>; -using HWD = StrongShape<Layout::HWD>; - -// Common tensor shape for CNN models working with images. -using CHW = StrongShape<Layout::CHW>; -using HWC = StrongShape<Layout::HWC>; -using HWDC = StrongShape<Layout::HWDC>; -using BHWC = StrongShape<Layout::BHWC>; -using BHWDC = StrongShape<Layout::BHWDC>; - -// Tensor shape used in convolution_2d weights. -using OIHW = StrongShape<Layout::OIHW>; -using OHWI = StrongShape<Layout::OHWI>; -using IHWO = StrongShape<Layout::IHWO>; -using IOHW = StrongShape<Layout::IOHW>; - -// Tensor shape used in convolution_3d weights. -using OHWDI = StrongShape<Layout::OHWDI>; - -// ----------------------------------------------------------------------------- -// Everything below are internal implementation details. -// ----------------------------------------------------------------------------- - -namespace internal_shape -{ - -template <Axis T> struct AxisTraits; - -#define TFLITE_GPU_AXIS_TRAITS(AxisName, HolderName) \ - template <> struct AxisTraits<Axis::AxisName> \ - { \ - struct Holder \ - { \ - int32_t HolderName; \ - \ - protected: \ - int32_t operator()() const { return HolderName; } \ - void operator()(int32_t v) { HolderName = v; } \ - }; \ - \ - using dimension_holder_type = Holder; \ - } - -TFLITE_GPU_AXIS_TRAITS(CHANNELS, c); -TFLITE_GPU_AXIS_TRAITS(HEIGHT, h); -TFLITE_GPU_AXIS_TRAITS(WIDTH, w); -TFLITE_GPU_AXIS_TRAITS(INPUT_CHANNELS, i); -TFLITE_GPU_AXIS_TRAITS(OUTPUT_CHANNELS, o); -TFLITE_GPU_AXIS_TRAITS(BATCH, b); -TFLITE_GPU_AXIS_TRAITS(VALUE, v); -TFLITE_GPU_AXIS_TRAITS(DEPTH, d); - -#undef TFLITE_GPU_AXIS_TRAITS - -template <int N, Axis... As> struct StrongShapeImpl; - -template <int N> struct StrongShapeImpl<N> -{ - static constexpr int size() { return N; } - - static constexpr Axis axis(int) { return Axis::UNKNOWN; } - - static constexpr int index(Axis) { return -1; } - - static constexpr bool has(Axis) { return false; } - - int32_t get(Axis) const { return -1; } - - int32_t get(int) const { return -1; } - - template <Axis B> int32_t get() const { return -1; } - - bool set(Axis, int32_t) { return false; } - - bool set(int, int32_t) { return false; } - - template <Axis B> bool set(int32_t) { return false; } -}; - -// Used to deduce number of axis, and to be a child of a proper holder to -// provide access to the dimension by name -template <int N, Axis A, Axis... As> -struct StrongShapeImpl<N, A, As...> : public AxisTraits<A>::dimension_holder_type, - public StrongShapeImpl<N + 1, As...> -{ - using dimension_holder_type = typename AxisTraits<A>::dimension_holder_type; - - using rest_type = StrongShapeImpl<N + 1, As...>; - - StrongShapeImpl() : dimension_holder_type{0}, rest_type() {} - - template <typename... Ts> - explicit StrongShapeImpl(int32_t t, Ts... ts) : dimension_holder_type{t}, rest_type(ts...) - { - } - - static constexpr Axis axis(int index) { return index == N ? A : rest_type::axis(index); } - - static constexpr int index(Axis axis) { return axis == A ? N : rest_type::index(axis); } - - static constexpr bool has(Axis axis) { return axis == A ? true : rest_type::has(axis); } - - int32_t get(Axis axis) const - { - return axis == A ? dimension_holder_type::operator()() : rest_type::get(axis); - } - - template <Axis B> int32_t get() const - { - return B == A ? dimension_holder_type::operator()() : rest_type::template get<B>(); - } - - int32_t get(int index) const - { - return index == N ? dimension_holder_type::operator()() : rest_type::get(index); - } - - bool set(Axis axis, int32_t t) - { - if (axis == A) - { - dimension_holder_type::operator()(t); - return true; - } - return rest_type::set(axis, t); - } - - bool set(int index, int32_t t) - { - if (index == N) - { - dimension_holder_type::operator()(t); - return true; - } - return rest_type::set(index, t); - } - - template <Axis B> bool set(int32_t t) - { - if (A == B) - { - dimension_holder_type::operator()(t); - return true; - } - return rest_type::template set<B>(t); - } -}; - -template <Layout T> struct LayoutTraits; - -#define TFLITE_GPU_LAYOUT_TRAITS(LayoutName, ...) \ - template <> struct LayoutTraits<Layout::LayoutName> \ - { \ - using strong_shape_type = StrongShapeImpl<0, __VA_ARGS__>; \ - } - -TFLITE_GPU_LAYOUT_TRAITS(HW, Axis::HEIGHT, Axis::WIDTH); -TFLITE_GPU_LAYOUT_TRAITS(HWD, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH); -TFLITE_GPU_LAYOUT_TRAITS(OHWI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, - Axis::INPUT_CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(OIHW, Axis::OUTPUT_CHANNELS, Axis::INPUT_CHANNELS, Axis::HEIGHT, - Axis::WIDTH); -TFLITE_GPU_LAYOUT_TRAITS(IOHW, Axis::INPUT_CHANNELS, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, - Axis::WIDTH); -TFLITE_GPU_LAYOUT_TRAITS(IHWO, Axis::INPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, - Axis::OUTPUT_CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(CHW, Axis::CHANNELS, Axis::HEIGHT, Axis::WIDTH); -TFLITE_GPU_LAYOUT_TRAITS(HWC, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(HWDC, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, Axis::CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(LINEAR, Axis::VALUE); -TFLITE_GPU_LAYOUT_TRAITS(SCALAR, Axis::VALUE); -TFLITE_GPU_LAYOUT_TRAITS(BHWC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(BHWDC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, - Axis::CHANNELS); -TFLITE_GPU_LAYOUT_TRAITS(OHWDI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, - Axis::INPUT_CHANNELS); - -#undef TFLITE_GPU_LAYOUT_TRAITS - -template <> struct LayoutTraits<Layout::UNKNOWN> -{ - using strong_shape_type = StrongShapeImpl<0>; -}; - -template <Axis A> struct DimensionGetterFixedAxisFunc -{ - template <Layout T> int32_t operator()() const - { - constexpr int i = GetAxisIndex<T>(A); - return i >= 0 && i < l->dimensions.size() ? l->dimensions[i] : -1; - } - const Shape *l; -}; - -struct DimensionGetterFunc -{ - template <Layout T> int32_t operator()() const - { - uint32_t i = GetAxisIndex<T>(axis); - return i < l->dimensions.size() ? l->dimensions[i] : -1; - } - Axis axis; - const Shape *l; -}; - -template <Axis A> struct DimensionSetterFixedAxisFunc -{ - template <Layout T> bool operator()() const - { - constexpr uint32_t i = GetAxisIndex<T>(A); - if (i < l->dimensions.size()) - { - l->dimensions[i] = v; - return true; - } - return false; - } - Shape *l; - int32_t v; -}; - -struct DimensionSetterFunc -{ - template <Layout T> bool operator()() const - { - uint32_t i = GetAxisIndex<T>(axis); - if (i < l->dimensions.size()) - { - l->dimensions[i] = v; - return true; - } - return false; - } - Axis axis; - Shape *l; - int32_t v; -}; - -template <Layout L> struct ToShapeFunc -{ - template <Layout T> bool operator()() const - { - for (int i = 0; i < StrongShape<L>::size(); ++i) - { - int index = GetAxisIndex<T>(StrongShape<L>::axis(i)); - if (index < 0) - return false; - shape->set(i, l.dimensions[index]); - } - return true; - } - - StrongShape<L> *shape; - const Shape &l; -}; - -} // namespace internal_shape - -// template <Axis... As> -template <Layout L> struct StrongShape : public internal_shape::LayoutTraits<L>::strong_shape_type -{ - using strong_shape_type = typename internal_shape::LayoutTraits<L>::strong_shape_type; - StrongShape() = default; - - template <typename... Ts> explicit StrongShape(Ts... t) : strong_shape_type(t...) {} - - constexpr static Layout layout = L; - - bool operator==(const StrongShape<L> &shape) const - { - // TODO(akulik): implement better alternative. - return this->ToShape() == shape.ToShape(); - } - - bool operator!=(const StrongShape<L> &shape) const - { - // TODO(akulik): implement better alternative. - return this->ToShape() != shape.ToShape(); - } - bool empty() const { return DimensionsProduct() == 0; } - - // Turns StrongShape into generic shape. - Shape ToShape() const - { - std::vector<int32_t> dimensions(StrongShape::size()); - for (int i = 0; i < StrongShape::size(); ++i) - { - dimensions[i] = StrongShape::get(i); - } - return Shape(L, std::move(dimensions)); - } - - // @return all dimensions multiplied - int64_t DimensionsProduct() const - { - int64_t product = 1; - for (int i = 0; i < StrongShape::size(); ++i) - { - product *= StrongShape::get(i); - } - return product; - } - - // Translates given coordinates of the layout into a linear index assuming - // dimensions are sorted in tensor access order e.g. if you access - // foobar[i][j][k] order of coordinates should be i,j,k. - int64_t LinearIndex(const std::array<int32_t, StrongShape::size()> &coordinates) const - { - int64_t index = coordinates[0]; - for (int i = 1; i < StrongShape::size(); ++i) - { - index = index * StrongShape::get(i) + coordinates[i]; - } - return index; - } - - // Copies all dimensions from the given generic shape into specific shape. - // It requires shape to have all axis defined in the given - // StrongShape. For example: - // - If this shape is OHWI but given shape is OIHW, Adopt will copy all - // dimensions and return true. - // - If this shape is OIHW but input shape is HW, Adopt will copy H and W - // dimensions and return true, but if this shape is HW and given shape - // OIHW, then Adopt will return false because not all axis are present in - // the input shape. - // - // @return false if generic shape is not compatible. - bool Adopt(const Shape &shape) - { - return DispatchByLayout(shape.layout, internal_shape::ToShapeFunc<L>{this, shape}); - } - - // For all axis defined in a given shape copies values to this shape. - // Therefore, it is possible to copy dimensions from CHW to BCHW, but not - // the other way around. - // - // BCHW bchw; - // CHW chw; - // bchw.CopyAllGivenAxis(chw); --> true - // chw.CopyAllGivenAxis(bchw); --> false - // - // @return false if axis in source shape is not defined here, thus value - // was not copied. - template <Layout B> bool CopyAllGivenAxis(const StrongShape<B> &source) - { - for (int i = 0; i < source.size(); ++i) - { - if (!StrongShape::set(source.axis(i), source.get(i))) - { - return false; - } - } - return true; - } - - // For all axis defined in this shape copies values from the given shape. - // - // BCHW bchw; - // CHW chw; - // bchw.CopyAllDefinedAxis(chw); --> false - // chw.CopyAllDefinedAxis(bchw); --> true - // - // @return false if given shape does not have axis defined here, - // therefore a value was not copied. - template <Layout B> bool CopyAllDefinedAxis(const StrongShape<B> &source) - { - for (int i = 0; i < StrongShape::size(); ++i) - { - int source_index = source.index(StrongShape::axis(i)); - if (source_index < 0) - { - return false; - } - StrongShape::set(i, source.get(source_index)); // always true - } - return true; - } - - // Copies values only for matching axis. - template <Layout B> void CopyMatchingAxis(const StrongShape<B> &source) - { - for (int i = 0; i < StrongShape::size(); ++i) - { - StrongShape::set(source.axis(i), source.get(i)); - } - } - - // AbslHash function for using in flat hash containers. - template <typename H> friend H AbslHashValue(H hash_state, const StrongShape &strong_shape) - { - for (size_t i = 0; i < strong_shape.size(); ++i) - { - hash_state = H::combine(std::move(hash_state), strong_shape.get(i)); - } - return hash_state; - } -}; - -template <Layout T> inline std::string ToString(const StrongShape<T> &s) -{ - return ToString(s.ToShape()); -} - -template <Layout L> constexpr Layout StrongShape<L>::layout; - -template <class F> -auto DispatchByLayout(Layout type, F f) -> decltype(f.template operator()<Layout::UNKNOWN>()) -{ - switch (type) - { - case Layout::HW: - return f.template operator()<Layout::HW>(); - case Layout::HWD: - return f.template operator()<Layout::HWD>(); - case Layout::HWC: - return f.template operator()<Layout::HWC>(); - case Layout::HWDC: - return f.template operator()<Layout::HWDC>(); - case Layout::CHW: - return f.template operator()<Layout::CHW>(); - case Layout::OIHW: - return f.template operator()<Layout::OIHW>(); - case Layout::IOHW: - return f.template operator()<Layout::IOHW>(); - case Layout::OHWI: - return f.template operator()<Layout::OHWI>(); - case Layout::IHWO: - return f.template operator()<Layout::IHWO>(); - case Layout::LINEAR: - return f.template operator()<Layout::LINEAR>(); - case Layout::SCALAR: - return f.template operator()<Layout::SCALAR>(); - case Layout::BHWC: - return f.template operator()<Layout::BHWC>(); - case Layout::BHWDC: - return f.template operator()<Layout::BHWDC>(); - case Layout::OHWDI: - return f.template operator()<Layout::OHWDI>(); - case Layout::UNKNOWN: - return f.template operator()<Layout::UNKNOWN>(); - } - return f.template operator()<Layout::UNKNOWN>(); -} - -template <Layout T> constexpr int Size() { return StrongShape<T>::size(); } - -template <Layout T> constexpr Axis GetAxis(int index) { return StrongShape<T>::axis(index); } - -template <Layout T> constexpr int GetAxisIndex(Axis axis) { return StrongShape<T>::index(axis); } - -template <Layout T> constexpr bool HasAxis(Axis axis) { return StrongShape<T>::has(axis); } - -template <Axis D> inline int32_t Shape::get() const -{ - return DispatchByLayout(layout, internal_shape::DimensionGetterFixedAxisFunc<D>{this}); -} - -inline int32_t Shape::get(Axis axis) const -{ - return DispatchByLayout(layout, internal_shape::DimensionGetterFunc{axis, this}); -} - -template <Axis D> inline bool Shape::set(int32_t t) -{ - return DispatchByLayout(layout, internal_shape::DimensionSetterFixedAxisFunc<D>{this, t}); -} - -inline bool Shape::set(Axis axis, int32_t t) -{ - return DispatchByLayout(layout, internal_shape::DimensionSetterFunc{axis, this, t}); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Spi.h b/runtime/onert/backend/gpu_cl/open_cl/Spi.h deleted file mode 100644 index c1d65b67e..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Spi.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__ -#define __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__ - -#include <cstdint> - -#include "Api.h" -#include "AccessType.h" -#include "Status.h" - -// Contains only service provider-related interfaces. Users should not use them -// directly. - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// Converts a tensor object into another one. -class TensorObjectConverter -{ -public: - virtual ~TensorObjectConverter() = default; - - virtual absl::Status Convert(const TensorObject &input, const TensorObject &output) = 0; -}; - -class TensorObjectConverterBuilder -{ -public: - virtual ~TensorObjectConverterBuilder() = default; - - virtual bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const = 0; - - virtual absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output, - std::unique_ptr<TensorObjectConverter> *converter) = 0; -}; - -// Connects tensor definition provided by a user (external) with tensor -// definition used by the inference engine (internal). -struct TensorTieDef -{ - uint32_t id; - AccessType access_type; - TensorObjectDef internal_def; - TensorObjectDef external_def; -}; - -// Connects external tensor object to internal tensor object and provides -// functionality to copy data to/from external object to internal. -class TensorTie -{ -public: - explicit TensorTie(const TensorTieDef &def) : def_(def) {} - - virtual ~TensorTie() = default; - - virtual absl::Status SetExternalObject(TensorObject obj) = 0; - - virtual TensorObject GetExternalObject() = 0; - - virtual absl::Status CopyToExternalObject() = 0; - - virtual absl::Status CopyFromExternalObject() = 0; - - const TensorTieDef &def() const { return def_; } - -private: - const TensorTieDef def_; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Status.h b/runtime/onert/backend/gpu_cl/open_cl/Status.h deleted file mode 100644 index 6295a7e77..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Status.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__ - -#include "absl/status/status.h" // IWYU pragma: export -#define RETURN_IF_ERROR(s) \ - { \ - auto c = (s); \ - if (!c.ok()) \ - return c; \ - } // IWYU pragma: export - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc deleted file mode 100644 index eada697ac..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "StorageTypeUtil.h" - -#include "TensorType.h" -#include "DataType.h" -#include "Shape.h" -#include "Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape, - const TensorDescriptor &descriptor) -{ - const int slices = DivideRoundUp(shape.c, 4); - switch (descriptor.storage_type) - { - case TensorStorageType::BUFFER: - { - const uint64_t flt4_size = 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2); - const uint64_t buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size; - return buffer_size <= device_info.buffer_max_size; - } - case TensorStorageType::IMAGE_BUFFER: - return (uint64_t)shape.b * shape.w * shape.h * shape.d * slices <= - device_info.image_buffer_max_size; - case TensorStorageType::TEXTURE_3D: - if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) - { - // clCreateImage3D (that used in CL 1.0/1.1) can not create image with - // depth = 1 by specification; - return false; - } - return (uint64_t)shape.w * shape.b <= device_info.image3d_max_width && - (uint64_t)shape.h <= device_info.image3d_max_height && - (uint64_t)slices * shape.d <= device_info.image3d_max_depth; - case TensorStorageType::TEXTURE_ARRAY: - // Bug on some Adreno. b/131099086 - if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) - { - return false; - } - return (uint64_t)shape.w * shape.b <= device_info.image2d_max_width && - (uint64_t)shape.h <= device_info.image2d_max_height && - (uint64_t)slices * shape.d <= device_info.image_array_max_layers; - case TensorStorageType::TEXTURE_2D: - return (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width && - (uint64_t)shape.h * slices <= device_info.image2d_max_height; - case TensorStorageType::SINGLE_TEXTURE_2D: - return (uint64_t)shape.c <= 4 && - device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && - (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width && - (uint64_t)shape.h <= device_info.image2d_max_height; - default: - return false; - } -} - -bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape, - const TensorDescriptor &descriptor) -{ - const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CanCreateTensorWithShape(device_info, shape5D, descriptor); -} - -TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape, - const TensorStorageType &desired, const DataType &data_type, - const Layout &layout) -{ - if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout})) - { - return desired; - } - auto GetBestTypeAfterTextureArray = [&]() { - if (device_info.SupportsImageBuffer() && - CanCreateTensorWithShape( - device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout})) - { - return TensorStorageType::IMAGE_BUFFER; - } - else - { - return TensorStorageType::BUFFER; - } - }; - auto GetBestTypeAfterTexture2D = [&]() { - if (device_info.SupportsTextureArray() && - CanCreateTensorWithShape( - device_info, shape, - TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout})) - { - return TensorStorageType::TEXTURE_ARRAY; - } - else - { - return GetBestTypeAfterTextureArray(); - } - }; - auto GetBestTypeAfterTexture3D = [&]() { - if (CanCreateTensorWithShape( - device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout})) - { - return TensorStorageType::TEXTURE_2D; - } - else - { - return GetBestTypeAfterTexture2D(); - } - }; - switch (desired) - { - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::SINGLE_TEXTURE_2D: - return GetBestTypeAfterTexture2D(); - case TensorStorageType::TEXTURE_ARRAY: - return GetBestTypeAfterTextureArray(); - case TensorStorageType::TEXTURE_3D: - return GetBestTypeAfterTexture3D(); - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::BUFFER: - return TensorStorageType::BUFFER; - default: - return TensorStorageType::BUFFER; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h deleted file mode 100644 index a84c3865f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__ - -#include "DeviceInfo.h" -#include "TensorType.h" -#include "DataType.h" -#include "Shape.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape, - const TensorDescriptor &descriptor); - -bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape, - const TensorDescriptor &descriptor); - -TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape, - const TensorStorageType &desired, const DataType &data_type, - const Layout &layout); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc b/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc deleted file mode 100644 index 983e0d29d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc +++ /dev/null @@ -1,690 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Tensor.h" - -#include <cstring> -#include <vector> - -#include "absl/strings/str_cat.h" - -#include "Buffer.h" -#include "ClImageFormat.h" -#include "ClMemory.h" -#include "GpuObject.h" -#include "TensorType.h" -#include "InternalTensor.h" -#include "DataType.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, const void *data_ptr, - CLMemory *result) -{ - const int slices = DivideRoundUp(shape.c, 4); - cl_mem_flags mem_flags = CL_MEM_READ_WRITE; - if (data_ptr) - { - mem_flags |= CL_MEM_COPY_HOST_PTR; - } - switch (descriptor.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - { - const size_t data_size = - shape.b * shape.w * shape.h * shape.d * slices * 4 * SizeOf(descriptor.data_type); - cl_int error_code; - cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size, - const_cast<void *>(data_ptr), &error_code); - if (!memory) - { - return absl::UnknownError(absl::StrCat( - "Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code))); - } - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_2D: - { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = shape.w * shape.b * shape.d; - desc.image_height = shape.h * slices; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc, - const_cast<void *>(data_ptr), &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_3D: - { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = shape.w * shape.b; - desc.image_height = shape.h; - desc.image_depth = slices * shape.d; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = CreateImage3DLegacy(context.context(), mem_flags, &format, &desc, - const_cast<void *>(data_ptr), &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to create 3D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_ARRAY: - { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; - desc.image_width = shape.w * shape.b; - desc.image_height = shape.h; - desc.image_depth = 0; - desc.image_array_size = slices * shape.d; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = clCreateImage(context.context(), mem_flags, &format, &desc, - const_cast<void *>(data_ptr), &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat( - "Failed to create 2D texture array (clCreateImage): ", CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - - case TensorStorageType::SINGLE_TEXTURE_2D: - { - if (slices != 1) - { - return absl::InvalidArgumentError(absl::StrCat( - "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", shape.c, "was provided")); - } - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = shape.w * shape.b * shape.d; - desc.image_height = shape.h; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) - { - format.image_channel_order = ToChannelOrder(shape.c); - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - } - else - { - return absl::InvalidArgumentError( - absl::StrCat("This device doesn't support ", shape.c, "-channel textures.")); - } - - cl_int error_code; - cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc, - const_cast<void *>(data_ptr), &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat( - "Failed to create single 2D texture (clCreateImage): ", CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - - default: - return absl::InternalError("Unsupported tensor storage type"); - } -} - -absl::Status CreateImageBufferFromBuffer(const CLContext &context, cl_mem memory, - DataType data_type, int width, cl_mem *result) -{ - cl_image_format format; - cl_image_desc desc; - std::memset(&desc, 0, sizeof(desc)); - desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - desc.image_width = width; - desc.mem_object = memory; - - format.image_channel_data_type = ToImageChannelType(data_type); - format.image_channel_order = CL_RGBA; - - cl_int error_code; - *result = - clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc, nullptr, &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to create Image from Buffer (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CreateTensor(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, cl_mem memory, Tensor *result) -{ - const bool memory_owner = memory == nullptr; - if (memory_owner) - { - CLMemory mem; - RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, nullptr, &mem)); - memory = mem.Release(); - } - if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) - { - cl_mem image_memory; - RETURN_IF_ERROR(CreateImageBufferFromBuffer( - context, memory, descriptor.data_type, - shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory)); - *result = Tensor(memory, memory_owner, image_memory, shape, descriptor); - } - else - { - *result = Tensor(memory, memory_owner, shape, descriptor); - } - return absl::OkStatus(); -} - -absl::Status CreateTensorShared(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, cl_mem memory, Tensor *result) -{ - const bool memory_owner = false; - if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) - { - cl_mem image_memory; - RETURN_IF_ERROR(CreateImageBufferFromBuffer( - context, memory, descriptor.data_type, - shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory)); - *result = Tensor(memory, memory_owner, image_memory, shape, descriptor); - } - else - { - *result = Tensor(memory, memory_owner, shape, descriptor); - } - return absl::OkStatus(); -} - -} // namespace - -absl::Status TensorDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const -{ - Tensor gpu_tensor; - RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context)); - *result = absl::make_unique<Tensor>(std::move(gpu_tensor)); - return absl::OkStatus(); -} - -Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC &shape, - const TensorDescriptor &descriptor) - : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner), - shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor) -{ -} - -Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape, - const TensorDescriptor &descriptor) - : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner), shape_(shape), - descriptor_(descriptor) -{ -} - -Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape, - const TensorDescriptor &descriptor) - : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner), - shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor) -{ -} - -Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape, - const TensorDescriptor &descriptor) - : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner), - shape_(shape), descriptor_(descriptor) -{ -} - -Tensor::Tensor(Tensor &&tensor) - : memory_(tensor.memory_), image_buffer_memory_(tensor.image_buffer_memory_), - memory_owner_(tensor.memory_owner_), shape_(tensor.shape_), descriptor_(tensor.descriptor_) -{ - tensor.memory_ = nullptr; - tensor.image_buffer_memory_ = nullptr; -} - -Tensor &Tensor::operator=(Tensor &&tensor) -{ - if (this != &tensor) - { - Release(); - std::swap(memory_, tensor.memory_); - std::swap(image_buffer_memory_, tensor.image_buffer_memory_); - std::swap(memory_owner_, tensor.memory_owner_); - std::swap(shape_, tensor.shape_); - std::swap(descriptor_, tensor.descriptor_); - } - return *this; -} - -void Tensor::Release() -{ - // image_buffer_memory_ always owned by object - if (image_buffer_memory_) - { - clReleaseMemObject(image_buffer_memory_); - image_buffer_memory_ = nullptr; - } - if (memory_owner_ && memory_) - { - clReleaseMemObject(memory_); - memory_ = nullptr; - } -} - -absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const -{ - const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr); - if (buffer_desc) - { - if (descriptor_.storage_type != TensorStorageType::BUFFER) - { - return absl::InvalidArgumentError("Tensor can be used with BufferDescriptor only wtih " - "TensorStorageType::BUFFER."); - } - resources->buffers.push_back({"buffer", memory_}); - return absl::OkStatus(); - } - const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(obj_ptr); - if (!tensor_desc) - { - return absl::InvalidArgumentError("Expected TensorDescriptor on input."); - } - if (descriptor_.HasAxis(Axis::WIDTH)) - { - resources->ints.push_back({"width", Width()}); - resources->ints.push_back({"width_div2", Width() / 2}); - resources->ints.push_back({"width_div4", Width() / 4}); - resources->ints.push_back({"width_batched", Width() * Batch()}); - resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2}); - resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4}); - } - if (descriptor_.HasAxis(Axis::HEIGHT)) - { - resources->ints.push_back({"height", Height()}); - } - if (descriptor_.HasAxis(Axis::CHANNELS)) - { - resources->ints.push_back({"slices", Slices()}); - resources->ints.push_back({"channels", Channels()}); - } - if (descriptor_.HasAxis(Axis::BATCH)) - { - resources->ints.push_back({"batch", Batch()}); - } - if (descriptor_.HasAxis(Axis::DEPTH)) - { - resources->ints.push_back({"depth", Depth()}); - } - - if (descriptor_.storage_type == TensorStorageType::BUFFER) - { - resources->buffers.push_back({"buffer", memory_}); - } - else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D || - descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) - { - resources->images2d.push_back({"image2d", memory_}); - } - else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) - { - resources->image2d_arrays.push_back({"image2d_array", memory_}); - } - else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) - { - resources->images3d.push_back({"image3d", memory_}); - } - else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) - { - if (obj_ptr->GetAccess() == AccessType::READ) - { - resources->image_buffers.push_back({"image_buffer", image_buffer_memory_}); - } - else - { - resources->buffers.push_back({"buffer", memory_}); - } - } - - return absl::OkStatus(); -} - -int3 Tensor::GetFullTensorRegion() const -{ - switch (descriptor_.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::IMAGE_BUFFER: - return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()}; - case TensorStorageType::TEXTURE_2D: - return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1}; - case TensorStorageType::SINGLE_TEXTURE_2D: - return {shape_.w * shape_.b * shape_.d, shape_.h, 1}; - case TensorStorageType::UNKNOWN: - return {-1, -1, -1}; - } - return {-1, -1, -1}; -} - -absl::Status Tensor::IsValid(const BHWC &shape) const -{ - if (shape.b != shape_.b) - { - return absl::InvalidArgumentError("Shape batch does not match tensor batch"); - } - if (shape.w != shape_.w) - { - return absl::InvalidArgumentError("Shape width does not match tensor width"); - } - if (shape.h != shape_.h) - { - return absl::InvalidArgumentError("Shape height does not match tensor height"); - } - if (shape.c != shape_.c) - { - return absl::InvalidArgumentError("Shape channels does not match tensor channels"); - } - return absl::OkStatus(); -} - -absl::Status Tensor::IsValid(const BHWDC &shape) const -{ - if (shape.b != shape_.b) - { - return absl::InvalidArgumentError("Shape batch does not match tensor batch"); - } - if (shape.w != shape_.w) - { - return absl::InvalidArgumentError("Shape width does not match tensor width"); - } - if (shape.h != shape_.h) - { - return absl::InvalidArgumentError("Shape height does not match tensor height"); - } - if (shape.d != shape_.d) - { - return absl::InvalidArgumentError("Shape depth does not match tensor depth"); - } - if (shape.c != shape_.c) - { - return absl::InvalidArgumentError("Shape channels does not match tensor channels"); - } - return absl::OkStatus(); -} - -int Tensor::GetAlignedChannels() const -{ - return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape_.c - : AlignByN(shape_.c, 4); -} - -uint64_t Tensor::GetMemorySizeInBytes() const -{ - const uint64_t flt_size = static_cast<uint64_t>(SizeOf(descriptor_.data_type)); - const uint64_t flt4_size = 4 * flt_size; - switch (descriptor_.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices(); - case TensorStorageType::SINGLE_TEXTURE_2D: - return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d; - default: - return 0; - } -} - -cl_mem Tensor::GetMemoryPtr() const -{ - return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER ? image_buffer_memory_ - : memory_; -} - -cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; } - -absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue) -{ - void *data_ptr = nullptr; - const int aligned_channels = GetAlignedChannels(); - const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels; - - const size_t data_size = elements_count * SizeOf(descriptor_.data_type); - std::vector<float> data_f; - data_f.resize(elements_count); - data_ptr = data_f.data(); - DataFromBHWDC(in, shape_, descriptor_, absl::MakeSpan(data_f.data(), data_f.size())); - - switch (descriptor_.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr)); - break; - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::SINGLE_TEXTURE_2D: - RETURN_IF_ERROR(queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr)); - break; - default: - return absl::InternalError("Unsupported tensor storage type"); - } - - return absl::OkStatus(); -} - -absl::Status Tensor::WriteData(CLCommandQueue *queue, const TensorFloat32 &src) -{ - RETURN_IF_ERROR(IsValid(src.shape)); - return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue); -} - -absl::Status Tensor::WriteData(CLCommandQueue *queue, - const InternalTensor<Linear, DataType::FLOAT32> &src) -{ - return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue); -} - -absl::Status Tensor::WriteData(CLCommandQueue *queue, - const InternalTensor<HWC, DataType::FLOAT32> &src) -{ - return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue); -} - -absl::Status Tensor::WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src) -{ - RETURN_IF_ERROR(IsValid(src.shape)); - return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue); -} - -absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const -{ - void *data_ptr = nullptr; - const int aligned_channels = GetAlignedChannels(); - const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels; - const size_t data_size = elements_count * SizeOf(descriptor_.data_type); - - std::vector<float> data_f; - data_f.resize(elements_count); - data_ptr = data_f.data(); - switch (descriptor_.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr)); - break; - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::SINGLE_TEXTURE_2D: - RETURN_IF_ERROR(queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr)); - break; - default: - return absl::InternalError("Unsupported tensor storage type"); - } - - if (descriptor_.data_type == DataType::FLOAT32) - { - DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_, descriptor_, out); - } - - return absl::OkStatus(); -} - -absl::Status Tensor::ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const -{ - RETURN_IF_ERROR(IsValid(dst->shape)); - return ReadDataBHWDC(absl::MakeSpan(dst->data), queue); -} - -absl::Status Tensor::ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const -{ - RETURN_IF_ERROR(IsValid(dst->shape)); - return ReadDataBHWDC(absl::MakeSpan(dst->data), queue); -} - -absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context) -{ - shape_ = desc.shape; - descriptor_.data_type = desc.data_type; - descriptor_.storage_type = desc.storage_type; - descriptor_.layout = desc.layout; - memory_owner_ = true; - CLMemory memory; - uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data()); - RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory)); - memory_ = memory.Release(); - if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) - { - RETURN_IF_ERROR(CreateImageBufferFromBuffer(*context, memory_, desc.data_type, - shape_.b * shape_.w * shape_.h * shape_.d * - DivideRoundUp(shape_.c, 4), - &image_buffer_memory_)); - } - return absl::OkStatus(); -} - -absl::Status CreateTensor(const CLContext &context, const BHWC &shape, - const TensorDescriptor &descriptor, Tensor *result) -{ - const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CreateTensor(context, shape5D, descriptor, nullptr, result); -} - -absl::Status CreateTensor(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, Tensor *result) -{ - return CreateTensor(context, shape, descriptor, nullptr, result); -} - -absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape, - const TensorDescriptor &descriptor, Tensor *result) -{ - const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CreateTensorShared(context, shape5D, descriptor, memory, result); -} - -absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape, - const TensorDescriptor &descriptor, Tensor *result) -{ - return CreateTensorShared(context, shape, descriptor, memory, result); -} - -absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape, - const TensorDescriptor &descriptor, CLMemory *result) -{ - const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result); -} - -absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, CLMemory *result) -{ - return AllocateTensorMemory(context, shape, descriptor, nullptr, result); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h b/runtime/onert/backend/gpu_cl/open_cl/Tensor.h deleted file mode 100644 index b1930a423..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__ - -#include <cstdint> -#include <memory> - -#include "absl/types/span.h" -#include "ClCommandQueue.h" -#include "OpenclWrapper.h" -#include "ClContext.h" -#include "ClDevice.h" -#include "ClMemory.h" -#include "GpuObject.h" -#include "TensorType.h" -#include "Util.h" -#include "DataType.h" -#include "Shape.h" -#include "Status.h" -#include "InternalTensor.h" -#include "Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class Tensor : public GPUObject -{ -public: - Tensor() : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {} - Tensor(cl_mem memory, bool memory_owner, const BHWC &shape, const TensorDescriptor &descriptor); - Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape, const TensorDescriptor &descriptor); - Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape, - const TensorDescriptor &descriptor); - Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape, - const TensorDescriptor &descriptor); - - // Move only - Tensor(Tensor &&tensor); - Tensor &operator=(Tensor &&tensor); - Tensor(const Tensor &) = delete; - Tensor &operator=(const Tensor &) = delete; - - virtual ~Tensor() { Release(); } - - absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const override; - - int Width() const { return shape_.w; } - int Height() const { return shape_.h; } - int Depth() const { return shape_.d; } - int Channels() const { return shape_.c; } - int Slices() const { return DivideRoundUp(shape_.c, 4); } - int Batch() const { return shape_.b; } - TensorDescriptor GetDescriptor() const { return descriptor_; } - DataType GetDataType() const { return descriptor_.data_type; } - TensorStorageType GetStorageType() const { return descriptor_.storage_type; } - - // for profiling and memory statistics - uint64_t GetMemorySizeInBytes() const; - - cl_mem GetMemoryPtr() const; - - // This function returns buffer memory ptr for IMAGE_BUFFER instead of image - // memory ptr. - cl_mem GetMemoryPtrForWriting() const; - - absl::Status WriteData(CLCommandQueue *queue, const TensorFloat32 &src); - absl::Status WriteData(CLCommandQueue *queue, - const InternalTensor<Linear, DataType::FLOAT32> &src); - absl::Status WriteData(CLCommandQueue *queue, const InternalTensor<HWC, DataType::FLOAT32> &src); - - absl::Status WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src); - absl::Status ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const; - absl::Status ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const; - - absl::Status CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context); - -private: - absl::Status IsValid(const BHWC &shape) const; - absl::Status IsValid(const BHWDC &shape) const; - - int GetChannelsAlignment() const; - int GetAlignedChannels() const; - - absl::Status WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue); - absl::Status ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const; - - int3 GetFullTensorRegion() const; - void Release(); - - cl_mem memory_; - cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only - bool memory_owner_; - BHWDC shape_; - TensorDescriptor descriptor_; -}; - -using TensorPtr = std::shared_ptr<Tensor>; - -absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape, - const TensorDescriptor &descriptor, CLMemory *result); - -absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, CLMemory *result); - -absl::Status CreateTensor(const CLContext &context, const BHWC &shape, - const TensorDescriptor &descriptor, Tensor *result); - -absl::Status CreateTensor(const CLContext &context, const BHWDC &shape, - const TensorDescriptor &descriptor, Tensor *result); - -absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape, - const TensorDescriptor &descriptor, Tensor *result); - -absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape, - const TensorDescriptor &descriptor, Tensor *result); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc deleted file mode 100644 index 7ede38795..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc +++ /dev/null @@ -1,1116 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "TensorType.h" - -#include "absl/strings/str_cat.h" -#include "absl/strings/substitute.h" -#include "Shape.h" -#include "DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::string GetWriteImageFromDataType(DataType data_type) -{ - if (data_type == DataType::FLOAT32) - { - return "write_imagef"; - } - else if (data_type == DataType::FLOAT16) - { - return "write_imageh"; - } - else - { - throw std::runtime_error("Not supported data type"); - } -} - -} // namespace - -std::string TextureAddressModeToString(TextureAddressMode address_mode) -{ - switch (address_mode) - { - case TextureAddressMode::DONT_CARE: - return "smp_none"; - case TextureAddressMode::ZERO: - return "smp_zero"; - } - return ""; -} - -std::string ToString(TensorStorageType type) -{ - switch (type) - { - case TensorStorageType::UNKNOWN: - return "TensorStorageType::UNKNOWN"; - case TensorStorageType::BUFFER: - return "TensorStorageType::BUFFER"; - case TensorStorageType::TEXTURE_ARRAY: - return "TensorStorageType::TEXTURE_ARRAY"; - case TensorStorageType::TEXTURE_2D: - return "TensorStorageType::TEXTURE_2D"; - case TensorStorageType::TEXTURE_3D: - return "TensorStorageType::TEXTURE_3D"; - case TensorStorageType::SINGLE_TEXTURE_2D: - return "TensorStorageType::SINGLE_TEXTURE_2D"; - case TensorStorageType::IMAGE_BUFFER: - return "TensorStorageType::IMAGE_BUFFER"; - } - return ""; -} - -TensorDescriptor::TensorDescriptor(TensorDescriptor &&desc) - : GPUObjectDescriptor(std::move(desc)), data_type(desc.data_type), - storage_type(desc.storage_type), layout(desc.layout), shape(desc.shape), - data(std::move(desc.data)) -{ -} -TensorDescriptor &TensorDescriptor::operator=(TensorDescriptor &&desc) -{ - if (this != &desc) - { - std::swap(data_type, desc.data_type); - std::swap(storage_type, desc.storage_type); - std::swap(layout, desc.layout); - std::swap(shape, desc.shape); - data = std::move(desc.data); - GPUObjectDescriptor::operator=(std::move(desc)); - } - return *this; -} - -GPUResources TensorDescriptor::GetGPUResources() const -{ - GPUResources resources; - if (HasAxis(Axis::WIDTH)) - { - resources.ints.push_back("width"); - resources.ints.push_back("width_div2"); - resources.ints.push_back("width_div4"); - resources.ints.push_back("width_batched"); - resources.ints.push_back("width_batched_div2"); - resources.ints.push_back("width_batched_div4"); - } - if (HasAxis(Axis::HEIGHT)) - { - resources.ints.push_back("height"); - } - if (HasAxis(Axis::CHANNELS)) - { - resources.ints.push_back("slices"); - resources.ints.push_back("channels"); - } - if (HasAxis(Axis::BATCH)) - { - resources.ints.push_back("batch"); - } - if (HasAxis(Axis::DEPTH)) - { - resources.ints.push_back("depth"); - } - if (storage_type == TensorStorageType::BUFFER) - { - GPUBufferDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - desc.element_size = 4; - auto it1 = state_vars_.find("ElementsX2"); - if (it1 != state_vars_.end() && it1->second == "true") - { - desc.element_size = 8; - } - auto it2 = state_vars_.find("ElementsX4"); - if (it2 != state_vars_.end() && it2->second == "true") - { - desc.element_size = 16; - } - resources.buffers.push_back({"buffer", desc}); - } - else if (storage_type == TensorStorageType::SINGLE_TEXTURE_2D || - storage_type == TensorStorageType::TEXTURE_2D) - { - GPUImage2DDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - resources.images2d.push_back({"image2d", desc}); - } - else if (storage_type == TensorStorageType::TEXTURE_ARRAY) - { - GPUImage2DArrayDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - resources.image2d_arrays.push_back({"image2d_array", desc}); - } - else if (storage_type == TensorStorageType::TEXTURE_3D) - { - GPUImage3DDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - resources.images3d.push_back({"image3d", desc}); - } - else if (storage_type == TensorStorageType::IMAGE_BUFFER) - { - if (access_type_ == AccessType::READ) - { - GPUImageBufferDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - resources.image_buffers.push_back({"image_buffer", desc}); - } - else - { - GPUBufferDescriptor desc; - desc.data_type = data_type; - desc.access_type = access_type_; - desc.element_size = 4; - resources.buffers.push_back({"buffer", desc}); - } - } - return resources; -} - -absl::Status TensorDescriptor::PerformSelector(const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const -{ - if (selector == "Width") - { - *result = GetWidth(); - return absl::OkStatus(); - } - else if (selector == "Height") - { - *result = "height"; - return absl::OkStatus(); - } - else if (selector == "Slices") - { - *result = "slices"; - return absl::OkStatus(); - } - else if (selector == "SliceStride") - { - *result = GetSliceStride(); - return absl::OkStatus(); - } - else if (selector == "Channels") - { - *result = "channels"; - return absl::OkStatus(); - } - else if (selector == "Batch") - { - if (HasAxis(Axis::BATCH)) - { - *result = "batch"; - } - else - { - *result = "1"; - } - return absl::OkStatus(); - } - else if (selector == "Depth") - { - *result = "depth"; - return absl::OkStatus(); - } - else if (selector == "SetBatchRef") - { - if (args.size() != 1) - { - return absl::InvalidArgumentError("Unsupported arguments in SetBatchRef selector"); - } - state_vars_["batch_id"] = args[0]; - *result = ""; - return absl::OkStatus(); - } - else if (selector == "Read") - { - return PerformReadSelector(args, template_args, result); - } - else if (selector == "Write") - { - return PerformWriteSelector(args, result); - } - else if (selector == "WriteLinear") - { - return PerformWriteLinearSelector(args, result); - } - else if (selector == "GetAddress") - { - return PerformGetAddressSelector(args, result); - } - else if (selector == "GetPtrWithSliceOffset") - { - return PerformGetPtrWithSliceOffsetSelector(args, result); - } - else if (selector == "GetWHOffset") - { - return PerformGetWHOffsetSelector(args, result); - } - else if (selector == "GetHandle") - { - return PerformGetHandleSelector(args, result); - } - else - { - return absl::NotFoundError( - absl::StrCat("TensorDescriptor don't have selector with name - ", selector)); - } -} - -absl::Status TensorDescriptor::PerformReadSelector(const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const -{ - DataType read_as_type = data_type; - if (!template_args.empty()) - { - if (template_args.size() != 1) - { - return absl::NotFoundError("Unrecognized Read selector template arguments."); - } - else - { - RETURN_IF_ERROR(GetDataTypeFromTemplateArgs(template_args[0], &read_as_type)); - } - } - if (args.size() == 1) - { // function overload for 1D linear types. - if (storage_type == TensorStorageType::BUFFER || - storage_type == TensorStorageType::IMAGE_BUFFER) - { - *result = Read(read_as_type, args[0]); - return absl::OkStatus(); - } - else - { - return absl::InvalidArgumentError( - "Read selector with single argument can be used only with linear " - "storage types(BUFFER or IMAGE_BUFFER)"); - } - } - std::string xc; - std::string yc; - std::string zc; - std::string sc; - std::string bc; - bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc); - if (args.size() < 2 || !parsed) - { - return absl::NotFoundError("Unrecognized Read selector"); - } - - *result = Read(read_as_type, GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc)); - return absl::OkStatus(); -} - -absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector( - const std::vector<std::string> &args, std::string *value_name, std::string *x_coord, - std::string *y_coord, std::string *s_coord) const -{ - std::string xc; - std::string yc; - std::string zc; - std::string sc; - std::string bc; - bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc); - if (args.size() < 2 || !parsed) - { - return absl::NotFoundError("Unrecognized Write selector"); - } - *value_name = args[0]; - if (HasAxis(Axis::BATCH) && !IsBatchedWidth()) - { - *x_coord = absl::StrCat("((", xc, ") * batch + (", bc, "))"); - } - else - { - *x_coord = absl::StrCat("(", xc, ")"); - } - *y_coord = absl::StrCat("(", yc, ")"); - *s_coord = absl::StrCat("(", sc, ")"); - return absl::OkStatus(); -} - -absl::Status TensorDescriptor::PerformWriteSelector(const std::vector<std::string> &args, - std::string *result) const -{ - std::string xc; - std::string yc; - std::string zc; - std::string sc; - std::string bc; - bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc); - if (args.size() < 2 || !parsed) - { - return absl::NotFoundError("Unrecognized Write selector"); - } - *result = Write(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc)); - return absl::OkStatus(); -} - -absl::Status TensorDescriptor::PerformWriteLinearSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER) - { - return absl::InvalidArgumentError("WriteLinear selector can be used only with linear " - "storages(BUFFER/IMAGE_BUFFER)"); - } - if (args.size() != 2) - { - return absl::NotFoundError("Unrecognized WriteLinear selector"); - } - *result = Write(args[0], "(" + args[1] + ")"); - return absl::OkStatus(); -} - -std::string TensorDescriptor::Read(DataType read_as_type, const std::string &global_address) const -{ - const std::string read_as = read_as_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef"; - std::string image_type; - if (storage_type == TensorStorageType::TEXTURE_2D || - storage_type == TensorStorageType::SINGLE_TEXTURE_2D) - { - image_type = "image2d"; - } - else if (storage_type == TensorStorageType::TEXTURE_3D) - { - image_type = "image3d"; - } - else if (storage_type == TensorStorageType::TEXTURE_ARRAY) - { - image_type = "image2d_array"; - } - switch (storage_type) - { - case TensorStorageType::BUFFER: - if (read_as_type == data_type) - { - return absl::StrCat("buffer[", global_address, "]"); - } - else - { - const std::string conversion = - read_as_type == DataType::FLOAT16 ? "convert_half4" : "convert_float4"; - return absl::StrCat(conversion, "(buffer[", global_address, "])"); - } - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::SINGLE_TEXTURE_2D: - case TensorStorageType::TEXTURE_ARRAY: - return absl::StrCat(read_as, "(", image_type, - ", " + TextureAddressModeToString(ModeFromState()) + ", ", global_address, - ")"); - case TensorStorageType::IMAGE_BUFFER: - return absl::StrCat(read_as, "(image_buffer, ", global_address, ")"); - case TensorStorageType::UNKNOWN: - return ""; - } - return ""; -} - -std::string TensorDescriptor::Write(const std::string &var_name, - const std::string &global_address) const -{ - std::string image_type; - if (storage_type == TensorStorageType::TEXTURE_2D || - storage_type == TensorStorageType::SINGLE_TEXTURE_2D) - { - image_type = "image2d"; - } - else if (storage_type == TensorStorageType::TEXTURE_3D) - { - image_type = "image3d"; - } - else if (storage_type == TensorStorageType::TEXTURE_ARRAY) - { - image_type = "image2d_array"; - } - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - return absl::StrCat("buffer[", global_address, "] = ", var_name, ";\n"); - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::SINGLE_TEXTURE_2D: - case TensorStorageType::TEXTURE_ARRAY: - return absl::StrCat(GetWriteImageFromDataType(data_type), "(", image_type, ", ", - global_address, ", ", var_name, ");\n"); - case TensorStorageType::UNKNOWN: - return ""; - } - return ""; -} - -absl::Status TensorDescriptor::PerformGetAddressSelector(const std::vector<std::string> &args, - std::string *result) const -{ - std::string xc; - std::string yc; - std::string zc; - std::string sc; - std::string bc; - bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc); - if (args.size() < 3 || !parsed) - { - return absl::NotFoundError("Unrecognized GetAddress selector"); - } - - *result = DeclareAddress(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc)); - return absl::OkStatus(); -} - -absl::Status -TensorDescriptor::PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (storage_type != TensorStorageType::BUFFER) - { - return absl::InvalidArgumentError( - "GetPtrWithSliceOffset selector can be used only with BUFFER"); - } - if (args.size() != 1) - { - return absl::NotFoundError( - absl::StrCat("GetPtrWithSliceOffset require one argument(slice coordinate), but ", - args.size(), " was passed")); - } - *result = absl::StrCat("buffer + ", args[0], " * ", GetSliceStride()); - return absl::OkStatus(); -} - -absl::Status TensorDescriptor::PerformGetWHOffsetSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER) - { - return absl::InvalidArgumentError( - "GetWHOffset selector can be used only with BUFFER/IMAGE_BUFFER"); - } - if (args.size() != 2) - { - return absl::NotFoundError(absl::StrCat( - "GetWHOffset require two arguments(X and Y coordinates), but ", args.size(), " was passed")); - } - if (HasAxis(Axis::BATCH) && !IsBatchedWidth()) - { - auto it = state_vars_.find("batch_id"); - std::string batch_id; - if (it == state_vars_.end()) - { - return absl::NotFoundError( - "Not found batch_id. Should be setted up by SetBatchRef(). method"); - } - else - { - batch_id = it->second; - } - *result = absl::StrCat("((", args[1], ") * ", GetWidth(), " + (", args[0], ")) * batch + (", - batch_id, ")"); - } - else - { - *result = absl::StrCat("(", args[1], ") * ", GetWidth(), " + (", args[0], ")"); - } - return absl::OkStatus(); -} - -absl::Status TensorDescriptor::PerformGetHandleSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (!args.empty()) - { - return absl::NotFoundError( - absl::StrCat("GetHandle does not require arguments, but ", args.size(), " was passed")); - } - switch (storage_type) - { - case TensorStorageType::BUFFER: - *result = "buffer"; - return absl::OkStatus(); - case TensorStorageType::IMAGE_BUFFER: - if (access_type_ == AccessType::READ) - { - *result = "image_buffer"; - } - else - { - *result = "buffer"; - } - return absl::OkStatus(); - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::SINGLE_TEXTURE_2D: - *result = "image2d"; - return absl::OkStatus(); - case TensorStorageType::TEXTURE_ARRAY: - *result = "image2d_array"; - return absl::OkStatus(); - case TensorStorageType::TEXTURE_3D: - *result = "image3d"; - return absl::OkStatus(); - case TensorStorageType::UNKNOWN: - return absl::UnavailableError("Unknown type"); - } - return absl::UnavailableError("Unknown type"); -} - -std::string TensorDescriptor::DeclareAddress(const std::string &var_name, - const std::string &address) const -{ - return absl::StrCat(StorageTypeToAddressType(), " ", var_name, " = ", address, ";"); -} - -std::string TensorDescriptor::StorageTypeToAddressType() const -{ - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - return "int"; - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::SINGLE_TEXTURE_2D: - return "int2"; - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return "int4"; - case TensorStorageType::UNKNOWN: - return ""; - } - return ""; -} - -std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHS(const std::string &x, - const std::string &y, - const std::string &s) const -{ - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - { - return absl::Substitute("((($2) * height + ($1)) * $3 + ($0))", x, y, s, GetWidth()); - } - case TensorStorageType::TEXTURE_2D: - return absl::Substitute("(int2)(($0), ($1) * slices + ($2))", x, y, s); - case TensorStorageType::SINGLE_TEXTURE_2D: - return absl::StrCat("(int2)(", x, ", ", y, ")"); - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return absl::StrCat("(int4)(", x, ", ", y, ", ", s, ", 0)"); - case TensorStorageType::UNKNOWN: - return "error"; - } - return "error"; -} - -std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHSB(const std::string &x, - const std::string &y, - const std::string &s, - const std::string &b) const -{ - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - return absl::Substitute("(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y, s); - case TensorStorageType::TEXTURE_2D: - return absl::Substitute("(int2)(($0) * batch + ($1), ($2) * slices + ($3))", x, b, y, s); - case TensorStorageType::SINGLE_TEXTURE_2D: - return absl::Substitute("(int2)(($0) * batch + ($1), ($2))", x, b, y); - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3), 0)", x, b, y, s); - default: - throw std::runtime_error("Unknown storage type"); - } -} - -std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDS(const std::string &x, - const std::string &y, - const std::string &z, - const std::string &s) const -{ - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - { - return absl::Substitute("(((($3) * slices + ($2)) * height + ($1)) * $4 + ($0))", x, y, s, z, - GetWidth()); - } - case TensorStorageType::TEXTURE_2D: - return absl::Substitute("(int2)(($0) * depth + ($1), ($2) * slices + ($3))", x, z, y, s); - case TensorStorageType::SINGLE_TEXTURE_2D: - return absl::Substitute("(int2)(($0) * depth + ($1), ($2))", x, z, y); - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return absl::Substitute("(int4)(($0), ($1), ($2) * slices + ($3), 0)", x, y, z, s); - case TensorStorageType::UNKNOWN: - return "error"; - } - return "error"; -} - -std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDSB(const std::string &x, - const std::string &y, - const std::string &z, - const std::string &s, - const std::string &b) const -{ - switch (storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - return absl::Substitute("((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + " - "($0))", - b, x, y, s, z); - case TensorStorageType::TEXTURE_2D: - return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3) * slices + ($4))", - x, b, z, y, s); - case TensorStorageType::SINGLE_TEXTURE_2D: - return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3))", x, b, z, y); - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3) * slices + ($4), 0)", x, b, y, - z, s); - default: - throw std::runtime_error("Unknown storage type"); - } -} - -std::string TensorDescriptor::GetGlobalAddressNoDeclaration(const std::string &xc, - const std::string &yc, - const std::string &zc, - const std::string &sc, - const std::string &bc) const -{ - if (layout == Layout::HWC || (IsBatchedWidth() && layout == Layout::BHWC)) - { - return GetGlobalAddressNoDeclarationWHS(xc, yc, sc); - } - else if (layout == Layout::BHWC) - { - return GetGlobalAddressNoDeclarationWHSB(xc, yc, sc, bc); - } - else if (layout == Layout::HWDC || (IsBatchedWidth() && layout == Layout::BHWDC)) - { - return GetGlobalAddressNoDeclarationWHDS(xc, yc, zc, sc); - } - else if (layout == Layout::BHWDC) - { - return GetGlobalAddressNoDeclarationWHDSB(xc, yc, zc, sc, bc); - } - else - { - throw std::runtime_error("Unsupported layout"); - } -} - -absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs(const std::string &template_arg, - DataType *result) const -{ - std::string read_type = template_arg; - if (read_type == "FLT" || read_type == "ACCUM_FLT") - { - auto it = state_vars_.find(read_type); - if (it == state_vars_.end()) - { - return absl::UnavailableError( - absl::StrCat("Read selector template argument ", read_type, " uninitialized.")); - } - else - { - read_type = it->second; - } - } - - if (read_type == "half") - { - *result = DataType::FLOAT16; - } - else if (read_type == "float") - { - *result = DataType::FLOAT32; - } - else - { - return absl::NotFoundError( - absl::StrCat("Unrecognized Read selector template argument - ", read_type)); - } - return absl::OkStatus(); -} - -bool TensorDescriptor::HasAxis(Axis axis) const -{ - if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS) - { - return true; - } - if (axis == Axis::BATCH && (layout == Layout::BHWC || layout == Layout::BHWDC)) - { - return true; - } - if (axis == Axis::DEPTH && (layout == Layout::HWDC || layout == Layout::BHWDC)) - { - return true; - } - return false; -} - -void TensorDescriptor::SetTextureAddressMode(TextureAddressMode mode) -{ - if (mode == TextureAddressMode::ZERO) - { - state_vars_["TextureMode"] = "ZERO"; - } - else - { - state_vars_["TextureMode"] = "DONT_CARE"; - } -} - -bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string> &args, int offset, - std::string *xc, std::string *yc, std::string *zc, - std::string *sc, std::string *bc) const -{ - if (HasAxis(Axis::WIDTH)) - { - if ((size_t)offset >= args.size()) - return false; - *xc = args[offset++]; - } - if (HasAxis(Axis::HEIGHT)) - { - if ((size_t)offset >= args.size()) - return false; - *yc = args[offset++]; - } - if (HasAxis(Axis::DEPTH)) - { - if ((size_t)offset >= args.size()) - return false; - *zc = args[offset++]; - } - if (HasAxis(Axis::CHANNELS)) - { - if ((size_t)offset >= args.size()) - { - auto it = state_vars_.find("slice_id"); - if (it == state_vars_.end()) - { - return false; - } - else - { - *sc = it->second; - } - } - else - { - *sc = args[offset++]; - } - } - if (HasAxis(Axis::BATCH) && !IsBatchedWidth()) - { - if ((size_t)offset >= args.size()) - { - auto it = state_vars_.find("batch_id"); - if (it == state_vars_.end()) - { - return false; - } - else - { - *bc = it->second; - } - } - else - { - *bc = args[offset++]; - } - } - return true; -} - -bool TensorDescriptor::IsBatchedWidth() const -{ - auto it = state_vars_.find("BatchedWidth"); - return it != state_vars_.end() && it->second == "true"; -} - -std::string TensorDescriptor::GetWidth() const -{ - std::string div; - auto it1 = state_vars_.find("ElementsX2"); - if (it1 != state_vars_.end() && it1->second == "true") - { - div = "_div2"; - } - auto it2 = state_vars_.find("ElementsX4"); - if (it2 != state_vars_.end() && it2->second == "true") - { - div = "_div4"; - } - auto it = state_vars_.find("BatchedWidth"); - if (it != state_vars_.end() && it->second == "true") - { - return "width_batched" + div; - } - else - { - return "width" + div; - } -} - -std::string TensorDescriptor::GetSliceStride() const -{ - if (IsBatchedWidth()) - { - return GetWidth() + " * height"; - } - else - { - if (HasAxis(Axis::BATCH)) - { - return GetWidth() + " * height * batch"; - } - else - { - return GetWidth() + " * height"; - } - } -} - -TextureAddressMode TensorDescriptor::ModeFromState() const -{ - auto it = state_vars_.find("TextureMode"); - if (it != state_vars_.end()) - { - if (it->second == "ZERO") - { - return TextureAddressMode::ZERO; - } - else - { - return TextureAddressMode::DONT_CARE; - } - } - else - { - return TextureAddressMode::DONT_CARE; - } -} - -void TensorDescriptor::UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src) -{ - shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c); - UploadData(absl::MakeConstSpan(src.data)); -} - -void TensorDescriptor::UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src) -{ - shape = BHWDC(1, 1, 1, 1, src.shape.v); - UploadData(absl::MakeConstSpan(src.data)); -} - -void TensorDescriptor::UploadData(absl::Span<const float> src) -{ - int aligned_channels = - storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : AlignByN(shape.c, 4); - int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels; - data.resize(elements_count * SizeOf(data_type)); - if (data_type == DataType::FLOAT32) - { - float *gpu_data = reinterpret_cast<float *>(data.data()); - DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count)); - } -} - -bool TensorDescriptor::SupportsZeroClamp(const Axis &axis) const -{ - switch (storage_type) - { - case TensorStorageType::UNKNOWN: - return false; - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - return false; - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::SINGLE_TEXTURE_2D: - return axis == Axis::WIDTH || axis == Axis::HEIGHT; - case TensorStorageType::TEXTURE_3D: - return axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::DEPTH; - } - return false; -} - -bool TensorDescriptor::CanReadOutOfBorder(const Axis &) const -{ - switch (storage_type) - { - case TensorStorageType::UNKNOWN: - return false; - case TensorStorageType::BUFFER: - return false; - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_3D: - case TensorStorageType::SINGLE_TEXTURE_2D: - case TensorStorageType::TEXTURE_ARRAY: - return true; - } - return false; -} - -bool TensorDescriptor::IsLinear() const -{ - return storage_type == TensorStorageType::BUFFER || - storage_type == TensorStorageType::IMAGE_BUFFER; -} - -bool TensorDescriptor::ReturnsZeroForNegOneRead() const -{ - return storage_type == TensorStorageType::IMAGE_BUFFER; -} - -namespace -{ -int GetLinearIndex(const TensorDescriptor &desc, const BHWDC &shape, int b, int x, int y, int d, - int s, int sub_c) -{ - const int slices = DivideRoundUp(shape.c, 4); - switch (desc.storage_type) - { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) * 4 + - sub_c; // DSHWBC4 - case TensorStorageType::TEXTURE_2D: - return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) * 4 + - sub_c; // HSWBDC4 - case TensorStorageType::SINGLE_TEXTURE_2D: - return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c + sub_c; // HWBDC - default: - return -1; - } - return -1; -} - -int GetChannelsAlignment(const TensorDescriptor &desc, const BHWDC &shape) -{ - return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : 4; -} -} // namespace - -template <typename T> -void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc, - absl::Span<T> dst) -{ - const int channels_alignment = GetChannelsAlignment(desc, shape); - const int slices = DivideRoundUp(shape.c, 4); - for (int b = 0; b < shape.b; ++b) - { - for (int s = 0; s < slices; ++s) - { - for (int y = 0; y < shape.h; ++y) - { - for (int x = 0; x < shape.w; ++x) - { - for (int d = 0; d < shape.d; ++d) - { - for (int c = 0; c < channels_alignment; ++c) - { - float value; - if (s * 4 + c < shape.c) - { - const int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c}); - value = src[cpu_index]; - } - else - { - value = 0.0f; - } - int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c); - dst[gpu_index] = value; - } - } - } - } - } - } -} - -template void DataFromBHWDC<float>(absl::Span<const float> src, const BHWDC &shape, - const TensorDescriptor &desc, absl::Span<float> dst); - -template <typename T> -void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc, - absl::Span<float> dst) -{ - const int channels_alignment = GetChannelsAlignment(desc, shape); - const int slices = DivideRoundUp(shape.c, 4); - for (int b = 0; b < shape.b; ++b) - { - for (int s = 0; s < slices; ++s) - { - for (int y = 0; y < shape.h; ++y) - { - for (int x = 0; x < shape.w; ++x) - { - for (int d = 0; d < shape.d; ++d) - { - for (int c = 0; c < channels_alignment; ++c) - { - if (s * 4 + c >= shape.c) - { - continue; - } - int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c}); - int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c); - dst[cpu_index] = src[gpu_index]; - } - } - } - } - } - } -} - -template void DataToBHWDC<float>(absl::Span<const float> src, const BHWDC &shape, - const TensorDescriptor &desc, absl::Span<float> dst); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h b/runtime/onert/backend/gpu_cl/open_cl/TensorType.h deleted file mode 100644 index 45523783f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__ - -#include <cstddef> -#include <string> - -#include "absl/types/span.h" -#include "GpuObject.h" -#include "DataType.h" -#include "InternalTensor.h" -#include "Shape.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class TextureAddressMode -{ - DONT_CARE, // translated to CLK_ADDRESS_NONE - ZERO, // translated to CLK_ADDRESS_CLAMP -}; - -std::string TextureAddressModeToString(TextureAddressMode address_mode); - -enum class TensorStorageType -{ - UNKNOWN, - BUFFER, - IMAGE_BUFFER, - TEXTURE_2D, - TEXTURE_3D, - TEXTURE_ARRAY, - SINGLE_TEXTURE_2D -}; - -struct TensorDescriptor : public GPUObjectDescriptor -{ - TensorDescriptor() = default; - TensorDescriptor(DataType dt, TensorStorageType st, Layout l) - : data_type(dt), storage_type(st), layout(l) - { - } - - TensorDescriptor(const TensorDescriptor &) = default; - TensorDescriptor &operator=(const TensorDescriptor &) = default; - TensorDescriptor(TensorDescriptor &&desc); - TensorDescriptor &operator=(TensorDescriptor &&desc); - - bool operator==(const TensorDescriptor &d) const - { - return data_type == d.data_type && storage_type == d.storage_type && layout == d.layout; - } - - bool operator!=(const TensorDescriptor &d) const { return !(*this == d); } - - absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const override; - - GPUResources GetGPUResources() const override; - - absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override; - void Release() override { data.clear(); } - - bool HasAxis(Axis axis) const; - void SetTextureAddressMode(TextureAddressMode mode); - - absl::Status GetLinkingContextFromWriteSelector(const std::vector<std::string> &args, - std::string *value_name, std::string *x_coord, - std::string *y_coord, std::string *s_coord) const; - - void UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src); - void UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src); - - bool SupportsZeroClamp(const Axis &axis) const; - bool CanReadOutOfBorder(const Axis &axis) const; - bool IsLinear() const; - - // applicable only for types that: IsLinear -> true. - // In this case for address we have 1d component - addr (int) - // If for addr == -1 this linear storage type returns FLT4(0.0), this function - // returns true, otherwise false - bool ReturnsZeroForNegOneRead() const; - - DataType data_type = DataType::UNKNOWN; - TensorStorageType storage_type = TensorStorageType::UNKNOWN; - // This field describes logical layout, actual(physical) GPU layout can be - // totally different. - Layout layout = Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC - - // optional - BHWDC shape; - std::vector<uint8_t> data; - -private: - absl::Status PerformReadSelector(const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const; - - absl::Status PerformGetAddressSelector(const std::vector<std::string> &args, - std::string *result) const; - - absl::Status PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args, - std::string *result) const; - - absl::Status PerformGetWHOffsetSelector(const std::vector<std::string> &args, - std::string *result) const; - - absl::Status PerformGetHandleSelector(const std::vector<std::string> &args, - std::string *result) const; - - std::string DeclareAddress(const std::string &var_name, const std::string &address) const; - - std::string StorageTypeToAddressType() const; - - absl::Status PerformWriteSelector(const std::vector<std::string> &args, - std::string *result) const; - - absl::Status PerformWriteLinearSelector(const std::vector<std::string> &args, - std::string *result) const; - - std::string Read(DataType read_as_type, const std::string &global_address) const; - std::string Write(const std::string &var_name, const std::string &global_address) const; - - bool IsBatchedWidth() const; - - std::string GetWidth() const; - std::string GetSliceStride() const; - - TextureAddressMode ModeFromState() const; - - absl::Status GetDataTypeFromTemplateArgs(const std::string &template_arg, DataType *result) const; - - std::string GetGlobalAddressNoDeclarationWHS(const std::string &x, const std::string &y, - const std::string &s) const; - std::string GetGlobalAddressNoDeclarationWHSB(const std::string &x, const std::string &y, - const std::string &s, const std::string &b) const; - std::string GetGlobalAddressNoDeclarationWHDS(const std::string &x, const std::string &y, - const std::string &z, const std::string &s) const; - std::string GetGlobalAddressNoDeclarationWHDSB(const std::string &x, const std::string &y, - const std::string &z, const std::string &s, - const std::string &b) const; - std::string GetGlobalAddressNoDeclaration(const std::string &xc, const std::string &yc, - const std::string &zc, const std::string &sc, - const std::string &bc) const; - - bool ParseCoordsFromArgs(const std::vector<std::string> &args, int offset, std::string *xc, - std::string *yc, std::string *zc, std::string *sc, - std::string *bc) const; - - void UploadData(absl::Span<const float> src); -}; - -template <typename T> -void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc, - absl::Span<T> dst); - -template <typename T> -void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc, - absl::Span<float> dst); - -std::string ToString(TensorStorageType type); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc deleted file mode 100644 index b1f8309e4..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "TensorTypeUtil.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -ObjectType ToObjectType(TensorStorageType type) -{ - switch (type) - { - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::BUFFER: - return ObjectType::OPENCL_BUFFER; - case TensorStorageType::SINGLE_TEXTURE_2D: - case TensorStorageType::TEXTURE_2D: - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return ObjectType::OPENCL_TEXTURE; - default: - return ObjectType::UNKNOWN; - } -} - -DataLayout ToDataLayout(TensorStorageType type) -{ - switch (type) - { - case TensorStorageType::BUFFER: - return DataLayout::DHWC4; - case TensorStorageType::IMAGE_BUFFER: - return DataLayout::DHWC4; - case TensorStorageType::SINGLE_TEXTURE_2D: - return DataLayout::BHWC; - case TensorStorageType::TEXTURE_2D: - return DataLayout::HDWC4; - case TensorStorageType::TEXTURE_ARRAY: - return DataLayout::DHWC4; - case TensorStorageType::TEXTURE_3D: - return DataLayout::DHWC4; - default: - return DataLayout::UNKNOWN; - } -} - -TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout) -{ - switch (object_type) - { - case ObjectType::OPENCL_BUFFER: - return TensorStorageType::BUFFER; - case ObjectType::OPENCL_TEXTURE: - switch (data_layout) - { - case DataLayout::BHWC: - return TensorStorageType::SINGLE_TEXTURE_2D; - case DataLayout::DHWC4: - return TensorStorageType::TEXTURE_ARRAY; - case DataLayout::HDWC4: - return TensorStorageType::TEXTURE_2D; - default: - return TensorStorageType::UNKNOWN; - } - default: - return TensorStorageType::UNKNOWN; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h deleted file mode 100644 index f56fc3d83..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__ - -#include "Api.h" -#include "TensorType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -ObjectType ToObjectType(TensorStorageType type); - -DataLayout ToDataLayout(TensorStorageType type); - -TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc deleted file mode 100644 index ae25e85d0..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Texture2d.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -// Creates new 4-channel 2D texture with cl_channel_type elements -absl::Status CreateTexture2D(int width, int height, DataType type, void *data, CLContext *context, - Texture2D *result) -{ - cl_mem texture; - cl_channel_type channel_type = DataTypeToChannelType(type); - RETURN_IF_ERROR( - CreateRGBAImage2D(context->context(), width, height, channel_type, data, &texture)); - *result = Texture2D(texture, width, height, channel_type); - - return absl::OkStatus(); -} -} // namespace - -Texture2DDescriptor::Texture2DDescriptor(Texture2DDescriptor &&desc) - : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type), - normalized(desc.normalized), normalized_type(desc.normalized_type), size(desc.size), - data(std::move(desc.data)) -{ -} - -Texture2DDescriptor &Texture2DDescriptor::operator=(Texture2DDescriptor &&desc) -{ - if (this != &desc) - { - std::swap(element_type, desc.element_type); - std::swap(normalized, desc.normalized); - std::swap(normalized_type, desc.normalized_type); - std::swap(size, desc.size); - data = std::move(desc.data); - GPUObjectDescriptor::operator=(std::move(desc)); - } - return *this; -} - -void Texture2DDescriptor::Release() { data.clear(); } - -GPUResources Texture2DDescriptor::GetGPUResources() const -{ - GPUResources resources; - GPUImage2DDescriptor desc; - desc.data_type = element_type; - desc.access_type = access_type_; - resources.images2d.push_back({"tex2d", desc}); - return resources; -} - -absl::Status Texture2DDescriptor::PerformSelector(const std::string &selector, - const std::vector<std::string> &args, - const std::vector<std::string> &, - std::string *result) const -{ - if (selector == "Read") - { - return PerformReadSelector(args, result); - } - else - { - return absl::NotFoundError( - absl::StrCat("Texture2DDescriptor don't have selector with name - ", selector)); - } -} - -absl::Status Texture2DDescriptor::PerformReadSelector(const std::vector<std::string> &args, - std::string *result) const -{ - if (args.size() != 2) - { - return absl::NotFoundError(absl::StrCat("Texture2DDescriptor Read require two arguments, but ", - args.size(), " was passed")); - } - std::string read; - switch (element_type) - { - case DataType::FLOAT32: - read = "read_imagef"; - break; - case DataType::FLOAT16: - read = "read_imageh"; - break; - case DataType::INT8: - case DataType::INT16: - case DataType::INT32: - if (normalized) - { - read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef"; - } - else - { - read = "read_imagei"; - } - break; - case DataType::UINT8: - case DataType::UINT16: - case DataType::UINT32: - if (normalized) - { - read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef"; - } - else - { - read = "read_imageui"; - } - break; - default: - read = "unknown_type"; - break; - } - *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", " + args[1] + "))"); - return absl::OkStatus(); -} - -absl::Status Texture2DDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const -{ - Texture2D gpu_texture; - RETURN_IF_ERROR(gpu_texture.CreateFromTexture2DDescriptor(*this, context)); - *result = absl::make_unique<Texture2D>(std::move(gpu_texture)); - return absl::OkStatus(); -} - -Texture2D::Texture2D(cl_mem texture, int width, int height, cl_channel_type type) - : texture_(texture), width_(width), height_(height), channel_type_(type) -{ -} - -Texture2D::Texture2D(Texture2D &&texture) - : texture_(texture.texture_), width_(texture.width_), height_(texture.height_), - channel_type_(texture.channel_type_) -{ - texture.texture_ = nullptr; - texture.width_ = 0; - texture.height_ = 0; -} - -Texture2D &Texture2D::operator=(Texture2D &&texture) -{ - if (this != &texture) - { - Release(); - std::swap(channel_type_, texture.channel_type_); - std::swap(width_, texture.width_); - std::swap(height_, texture.height_); - std::swap(texture_, texture.texture_); - } - return *this; -} - -void Texture2D::Release() -{ - if (texture_) - { - clReleaseMemObject(texture_); - texture_ = nullptr; - width_ = 0; - height_ = 0; - } -} - -absl::Status Texture2D::GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const -{ - const auto *texture_desc = dynamic_cast<const Texture2DDescriptor *>(obj_ptr); - if (!texture_desc) - { - return absl::InvalidArgumentError("Expected Texture2DDescriptor on input."); - } - - resources->images2d.push_back({"tex2d", texture_}); - return absl::OkStatus(); -} - -absl::Status Texture2D::CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc, - CLContext *context) -{ - width_ = desc.size.x; - height_ = desc.size.y; - channel_type_ = DataTypeToChannelType(desc.element_type, desc.normalized); - uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data()); - return CreateRGBAImage2D(context->context(), desc.size.x, desc.size.y, channel_type_, data_ptr, - &texture_); -} - -// Creates new 4-channel 2D texture with f32 elements -absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result) -{ - return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context, result); -} - -// Creates new 4-channel 2D texture with f16 elements -absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result) -{ - return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context, result); -} - -absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context, - Texture2D *result) -{ - return CreateTexture2D(width, height, type, nullptr, context, result); -} - -absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data, - CLContext *context, Texture2D *result) -{ - return CreateTexture2D(width, height, type, data, context, result); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h deleted file mode 100644 index 264507079..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__ - -#include "absl/strings/str_cat.h" -#include "absl/types/span.h" -#include "ClCommandQueue.h" -#include "ClContext.h" -#include "GpuObject.h" -#include "OpenclWrapper.h" -#include "TensorType.h" -#include "Util.h" -#include "DataType.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -struct Texture2DDescriptor : public GPUObjectDescriptor -{ - DataType element_type; - bool normalized = false; // used with INT data types, if normalized, we read - // in kernel float data. - DataType normalized_type; // can be FLOAT32 or FLOAT16, using with normalized - // = true - - // optional - int2 size = int2(0, 0); - std::vector<uint8_t> data; - - Texture2DDescriptor() = default; - Texture2DDescriptor(const Texture2DDescriptor &) = default; - Texture2DDescriptor &operator=(const Texture2DDescriptor &) = default; - Texture2DDescriptor(Texture2DDescriptor &&desc); - Texture2DDescriptor &operator=(Texture2DDescriptor &&desc); - - absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args, - const std::vector<std::string> &template_args, - std::string *result) const override; - - GPUResources GetGPUResources() const override; - absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const; - - absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override; - void Release() override; -}; - -// Texture2D represent formatted GPU data storage. -// Texture2D is moveable but not copyable. -class Texture2D : public GPUObject -{ -public: - Texture2D() {} // just for using Texture2D as a class members - Texture2D(cl_mem texture, int width, int height, cl_channel_type type); - - // Move only - Texture2D(Texture2D &&texture); - Texture2D &operator=(Texture2D &&texture); - Texture2D(const Texture2D &) = delete; - Texture2D &operator=(const Texture2D &) = delete; - - virtual ~Texture2D() { Release(); } - - cl_mem GetMemoryPtr() const { return texture_; } - - // Writes data to a texture. Data should point to a region that - // has exact width * height * sizeof(pixel) bytes. - template <typename T> absl::Status WriteData(CLCommandQueue *queue, const absl::Span<T> data); - - // Reads data from Texture2D into CPU memory. - template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const; - - absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr, - GPUResourcesWithValue *resources) const override; - - absl::Status CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc, CLContext *context); - -private: - void Release(); - - cl_mem texture_ = nullptr; - int width_; - int height_; - cl_channel_type channel_type_; -}; - -using Texture2DPtr = std::shared_ptr<Texture2D>; - -// Creates new 4-channel 2D texture with f32 elements -absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result); - -// Creates new 4-channel 2D texture with f16 elements -absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result); - -absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context, - Texture2D *result); - -absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data, - CLContext *context, Texture2D *result); - -template <typename T> -absl::Status Texture2D::WriteData(CLCommandQueue *queue, const absl::Span<T> data) -{ - const int element_size = ChannelTypeToSizeInBytes(channel_type_); - if (sizeof(T) % element_size != 0) - { - return absl::InvalidArgumentError( - "Template type T has not suitable element type for created texture."); - } - if (4 * width_ * height_ * element_size != data.size() * sizeof(T)) - { - return absl::InvalidArgumentError( - "absl::Span<T> data size is different from texture allocated size."); - } - - RETURN_IF_ERROR(queue->EnqueueWriteImage(texture_, int3(width_, height_, 1), data.data())); - - return absl::OkStatus(); -} - -template <typename T> -absl::Status Texture2D::ReadData(CLCommandQueue *queue, std::vector<T> *result) const -{ - const int element_size = ChannelTypeToSizeInBytes(channel_type_); - if (sizeof(T) != element_size) - { - return absl::InvalidArgumentError("Pixel format is different."); - } - - const int elements_count = width_ * height_ * 4; - result->resize(elements_count); - - return queue->EnqueueReadImage(texture_, int3(width_, height_, 1), result->data()); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Types.h b/runtime/onert/backend/gpu_cl/open_cl/Types.h deleted file mode 100644 index f3cf33450..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Types.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__ - -#include <array> -#include <cstddef> -#include <cstdint> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// TODO(akulik): make these types Google-style compliant. - -template <typename T> struct alignas(sizeof(T)) Vec4 -{ - union { - struct - { - T x, y, z, w; - }; - std::array<T, 4> data_; - }; - - Vec4() : Vec4(T(0.0f)) {} - - template <typename S> Vec4(S x_, S y_, S z_, S w_) : x(x_), y(y_), z(z_), w(w_) {} - explicit Vec4(T v) : x(v), y(v), z(v), w(v) {} - - template <typename S> explicit Vec4(S v) : x(v), y(v), z(v), w(v) {} - - Vec4(const Vec4 &f) : x(f.x), y(f.y), z(f.z), w(f.w) {} - - template <typename S> Vec4(const Vec4<S> &f) : x(f.x), y(f.y), z(f.z), w(f.w) {} - - Vec4 &operator=(const Vec4 &other) - { - x = other.x; - y = other.y; - z = other.z; - w = other.w; - return *this; - } - - static constexpr int size() { return 4; } - - T &operator[](size_t n) { return data_[n]; } - T operator[](size_t n) const { return data_[n]; } - - bool operator==(const Vec4 &value) const - { - return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2] && - data_[3] == value[3]; - } - bool operator!=(const Vec4 &value) const { return !(this->operator==(value)); } -}; - -template <typename T> struct alignas(sizeof(T)) Vec3 -{ - union { - struct - { - T x, y, z; - }; - std::array<T, 3> data_; - }; - - Vec3() : Vec3(T(0.0f)) {} - - template <typename S> constexpr Vec3(S x_, S y_, S z_) : x(x_), y(y_), z(z_) {} - explicit Vec3(T v) : x(v), y(v), z(v) {} - - template <typename S> explicit Vec3(S v) : x(v), y(v), z(v) {} - - Vec3(const Vec3 &f) : x(f.x), y(f.y), z(f.z) {} - - template <typename S> Vec3(const Vec3<S> &f) : x(f.x), y(f.y), z(f.z) {} - - Vec3 &operator=(const Vec3 &other) - { - x = other.x; - y = other.y; - z = other.z; - return *this; - } - - static constexpr int size() { return 3; } - - T &operator[](size_t n) { return data_[n]; } - T operator[](size_t n) const { return data_[n]; } - bool operator==(const Vec3 &value) const - { - return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2]; - } - bool operator!=(const Vec3 &value) const { return !(this->operator==(value)); } -}; - -template <typename T> struct alignas(sizeof(T)) Vec2 -{ - union { - struct - { - T x, y; - }; - std::array<T, 2> data_; - }; - - Vec2() : Vec2(T(0.0f)) {} - - template <typename S> Vec2(S x_, S y_) : x(x_), y(y_) {} - explicit Vec2(T v) : x(v), y(v) {} - - template <typename S> explicit Vec2(S v) : x(v), y(v) {} - - Vec2(const Vec2 &f) : x(f.x), y(f.y) {} - - template <typename S> Vec2(const Vec2<S> &f) : x(f.x), y(f.y) {} - - Vec2 &operator=(const Vec2 &other) - { - x = other.x; - y = other.y; - return *this; - } - - bool operator==(const Vec2 &value) const { return data_[0] == value[0] && data_[1] == value[1]; } - - bool operator!=(const Vec2 &value) const { return !(this->operator==(value)); } - - static constexpr int size() { return 2; } - - T &operator[](size_t n) { return data_[n]; } - T operator[](size_t n) const { return data_[n]; } -}; - -using float2 = Vec2<float>; -using byte2 = Vec2<int8_t>; -using ubyte2 = Vec2<uint8_t>; -using short2 = Vec2<int16_t>; -using ushort2 = Vec2<uint16_t>; -using int2 = Vec2<int32_t>; -using uint2 = Vec2<uint32_t>; - -using float3 = Vec3<float>; -using byte3 = Vec3<int8_t>; -using ubyte3 = Vec3<uint8_t>; -using short3 = Vec3<int16_t>; -using ushort3 = Vec3<uint16_t>; -using int3 = Vec3<int32_t>; -using uint3 = Vec3<uint32_t>; - -using float4 = Vec4<float>; -using byte4 = Vec4<int8_t>; -using ubyte4 = Vec4<uint8_t>; -using short4 = Vec4<int16_t>; -using ushort4 = Vec4<uint16_t>; -using int4 = Vec4<int32_t>; -using uint4 = Vec4<uint32_t>; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/Util.cc deleted file mode 100644 index 9f5a8388b..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Util.cc +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Util.h" - -#include "absl/strings/str_cat.h" -#include "absl/strings/substitute.h" -#include "Status.h" -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::string CLErrorCodeToString(cl_int error_code) -{ - switch (error_code) - { - case CL_SUCCESS: - return "Success"; - case CL_DEVICE_NOT_FOUND: - return "Device not found"; - case CL_DEVICE_NOT_AVAILABLE: - return "Device not available"; - case CL_COMPILER_NOT_AVAILABLE: - return "Compiler not available"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - return "Memory object allocation failure"; - case CL_OUT_OF_RESOURCES: - return "Out of resources"; - case CL_OUT_OF_HOST_MEMORY: - return "Out of host memory"; - case CL_PROFILING_INFO_NOT_AVAILABLE: - return "Profiling information not available"; - case CL_MEM_COPY_OVERLAP: - return "Memory copy overlap"; - case CL_IMAGE_FORMAT_MISMATCH: - return "Image format mismatch"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: - return "Image format not supported"; - case CL_BUILD_PROGRAM_FAILURE: - return "Build program failure"; - case CL_MAP_FAILURE: - return "Mapping failure"; - case CL_MISALIGNED_SUB_BUFFER_OFFSET: - return "Misaligned sub-buffer offset"; - case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: - return "Execution status error for events in wait list"; - case CL_COMPILE_PROGRAM_FAILURE: - return "Compile program failure"; - case CL_LINKER_NOT_AVAILABLE: - return "Linker not available"; - case CL_LINK_PROGRAM_FAILURE: - return "Link program failure"; - case CL_DEVICE_PARTITION_FAILED: - return "Device partition failed"; - case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: - return "Kernel argument information not available"; - - case CL_INVALID_VALUE: - return "Invalid value"; - case CL_INVALID_DEVICE_TYPE: - return "Invalid device type"; - case CL_INVALID_PLATFORM: - return "Invalid platform"; - case CL_INVALID_DEVICE: - return "Invalid device"; - case CL_INVALID_CONTEXT: - return "Invalid context"; - case CL_INVALID_QUEUE_PROPERTIES: - return "Invalid queue properties"; - case CL_INVALID_COMMAND_QUEUE: - return "Invalid command queue"; - case CL_INVALID_HOST_PTR: - return "Invalid host pointer"; - case CL_INVALID_MEM_OBJECT: - return "Invalid memory object"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "Invalid image format descriptor"; - case CL_INVALID_IMAGE_SIZE: - return "Invalid image size"; - case CL_INVALID_SAMPLER: - return "Invalid sampler"; - case CL_INVALID_BINARY: - return "Invalid binary"; - case CL_INVALID_BUILD_OPTIONS: - return "Invalid build options"; - case CL_INVALID_PROGRAM: - return "Invalid program"; - case CL_INVALID_PROGRAM_EXECUTABLE: - return "Invalid program executable"; - case CL_INVALID_KERNEL_NAME: - return "Invalid kernel name"; - case CL_INVALID_KERNEL_DEFINITION: - return "Invalid kernel definition"; - case CL_INVALID_KERNEL: - return "Invalid kernel"; - case CL_INVALID_ARG_INDEX: - return "Invalid argument index"; - case CL_INVALID_ARG_VALUE: - return "Invalid argument value"; - case CL_INVALID_ARG_SIZE: - return "Invalid argument size"; - case CL_INVALID_KERNEL_ARGS: - return "Invalid kernel arguments"; - case CL_INVALID_WORK_DIMENSION: - return "Invalid work dimension"; - case CL_INVALID_WORK_GROUP_SIZE: - return "Invalid work group size"; - case CL_INVALID_WORK_ITEM_SIZE: - return "Invalid work item size"; - case CL_INVALID_GLOBAL_OFFSET: - return "Invalid global offset"; - case CL_INVALID_EVENT_WAIT_LIST: - return "Invalid event wait list"; - case CL_INVALID_EVENT: - return "Invalid event"; - case CL_INVALID_OPERATION: - return "Invalid operation"; - case CL_INVALID_GL_OBJECT: - return "Invalid GL object"; - case CL_INVALID_BUFFER_SIZE: - return "Invalid buffer size"; - case CL_INVALID_MIP_LEVEL: - return "Invalid mip-level"; - case CL_INVALID_GLOBAL_WORK_SIZE: - return "Invalid global work size"; - case CL_INVALID_PROPERTY: - return "Invalid property"; - case CL_INVALID_IMAGE_DESCRIPTOR: - return "Invalid image descriptor"; - case CL_INVALID_COMPILER_OPTIONS: - return "Invalid compiler options"; - case CL_INVALID_LINKER_OPTIONS: - return "Invalid linker options"; - case CL_INVALID_DEVICE_PARTITION_COUNT: - return "Invalid device partition count"; - case CL_INVALID_PIPE_SIZE: - return "Invalid pipe size"; - case CL_INVALID_DEVICE_QUEUE: - return "Invalid device queue"; - case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: - return "Invalid GL sharegroup reference KHR"; - - default: - return "Unknown OpenCL"; - } -} - -int ChannelTypeToSizeInBytes(cl_channel_type type) -{ - switch (type) - { - case CL_FLOAT: - return 4; - default: - return 0; - } -} - -absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data, - cl_mem *result) -{ - cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE; - if (data) - { - flags |= CL_MEM_COPY_HOST_PTR; - } - cl_int error_code; - *result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code); - if (!*result) - { - return absl::UnknownError(absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -cl_channel_type DataTypeToChannelType(DataType type, bool normalized) -{ - switch (type) - { - case DataType::FLOAT32: - return CL_FLOAT; - case DataType::INT8: - return normalized ? CL_SNORM_INT8 : CL_SIGNED_INT8; - case DataType::UINT8: - return normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; - case DataType::INT16: - return normalized ? CL_SNORM_INT16 : CL_SIGNED_INT16; - case DataType::UINT16: - return normalized ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; - case DataType::INT32: - return CL_SIGNED_INT32; - case DataType::UINT32: - return CL_UNSIGNED_INT32; - default: - return CL_FLOAT; - } -} - -absl::Status CreateRGBAImage2D(cl_context context, int width, int height, - cl_channel_type channel_type, void *data, cl_mem *result) -{ - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = width; - desc.image_height = height; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = channel_type; - - cl_mem_flags flags = CL_MEM_READ_WRITE; - if (data) - { - flags |= CL_MEM_COPY_HOST_PTR; - } - - cl_int error_code; - *result = CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size, - const std::string &stride_x, const std::string &padding_x) -{ - // TODO(sorokin) check perf and optimize with floor() if needed - // int p0 = src_x / batch_size;\n"; - // int b0 = src_x % batch_size;\n"; - // return p0 * stride_x * batch_size + b0 + padding_x;\n"; - return absl::Substitute("((($0) / $1) * $2 * $1 + (($0) % $1) + $3)", src_x, batch_size, stride_x, - padding_x); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.h b/runtime/onert/backend/gpu_cl/open_cl/Util.h deleted file mode 100644 index 996c564f4..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/Util.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__ - -#include <string> - -#include "absl/types/span.h" -#include "OpenclWrapper.h" -#include "DataType.h" -#include "InternalTensor.h" -#include "Status.h" -#include "Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts -// with B after W (for example HWBC4) and WB stored in one axis of GPU -// resources. -std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size, - const std::string &stride_x, const std::string &padding_x); - -// @param n must be non negative -// @param divisor must be greater than zero -template <typename T, typename N> T DivideRoundUp(T n, N divisor) -{ - const T div = static_cast<T>(divisor); - const T q = n / div; - return n % div == 0 ? q : q + 1; -} - -template <> inline uint3 DivideRoundUp(uint3 n, uint3 divisor) -{ - return uint3(DivideRoundUp(n.x, divisor.x), DivideRoundUp(n.y, divisor.y), - DivideRoundUp(n.z, divisor.z)); -} - -// @param number or its components must be greater than zero -// @param n must be greater than zero -template <typename T, typename N> T AlignByN(T number, N n) { return DivideRoundUp(number, n) * n; } - -std::string CLErrorCodeToString(cl_int error_code); - -int ChannelTypeToSizeInBytes(cl_channel_type type); - -template <DataType S, typename T> -void CopyLinearFLT4(const InternalTensor<Linear, S> &src, absl::Span<T> dst) -{ - const int dst_depth = dst.size(); - for (int d = 0; d < dst_depth; ++d) - { - T val; - for (int i = 0; i < 4; ++i) - { - const int dst_ch = d * 4 + i; - val[i] = dst_ch >= src.shape.v ? 0.0f : src.data[dst_ch]; - } - dst[d] = val; - } -} - -absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data, - cl_mem *result); - -cl_channel_type DataTypeToChannelType(DataType type, bool normalized = false); -absl::Status CreateRGBAImage2D(cl_context context, int width, int height, - cl_channel_type channel_type, void *data, cl_mem *result); - -template <DataType S, typename T> -void RearrangeWeightsToOHWIOGroupI4O4(const InternalTensor<OHWI, S> &weights, int out_group_size, - absl::Span<T> dst) -{ - const int dst_slices = DivideRoundUp(weights.shape.o, 4); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - const int dst_groups = DivideRoundUp(dst_slices, out_group_size); - - int counter = 0; - for (int d = 0; d < dst_groups; ++d) - { - for (int y = 0; y < weights.shape.h; ++y) - { - for (int x = 0; x < weights.shape.w; ++x) - { - for (int s = 0; s < src_slices; ++s) - { - for (int d_group = 0; d_group < out_group_size; ++d_group) - { - for (int j = 0; j < 4; ++j) - { - T filter; - for (int i = 0; i < 4; ++i) - { - const int s_ch = s * 4 + j; - const int d_ch = (d * out_group_size + d_group) * 4 + i; - if (s_ch < weights.shape.i && d_ch < weights.shape.o) - { - const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch}); - filter[i] = weights.data[f_index]; - } - else - { - filter[i] = 0.0f; - } - } - dst[counter++] = filter; - } - } - } - } - } - } -} - -template <DataType S, typename T> -void RearrangeWeightsToODHWIOGroupI4O4(const InternalTensor<OHWDI, S> &weights, int out_group_size, - absl::Span<T> dst) -{ - const int dst_slices = DivideRoundUp(weights.shape.o, 4); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - const int dst_groups = DivideRoundUp(dst_slices, out_group_size); - - int counter = 0; - for (int d = 0; d < dst_groups; ++d) - { - for (int z = 0; z < weights.shape.d; ++z) - { - for (int y = 0; y < weights.shape.h; ++y) - { - for (int x = 0; x < weights.shape.w; ++x) - { - for (int s = 0; s < src_slices; ++s) - { - for (int d_group = 0; d_group < out_group_size; ++d_group) - { - for (int j = 0; j < 4; ++j) - { - T filter; - for (int i = 0; i < 4; ++i) - { - const int s_ch = s * 4 + j; - const int d_ch = (d * out_group_size + d_group) * 4 + i; - if (s_ch < weights.shape.i && d_ch < weights.shape.o) - { - const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch}); - filter[i] = weights.data[f_index]; - } - else - { - filter[i] = 0.0f; - } - } - dst[counter++] = filter; - } - } - } - } - } - } - } -} - -template <DataType S, typename T> -void RearrangeWeightsToI4HWIOOGroupO4(const InternalTensor<OHWI, S> &weights, int out_group_size, - absl::Span<T> dst) -{ - const int dst_slices = DivideRoundUp(weights.shape.o, 4); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - const int dst_groups = DivideRoundUp(dst_slices, out_group_size); - - int counter = 0; - for (int j = 0; j < 4; ++j) - { - for (int y = 0; y < weights.shape.h; ++y) - { - for (int x = 0; x < weights.shape.w; ++x) - { - for (int s = 0; s < src_slices; ++s) - { - for (int d = 0; d < dst_groups; ++d) - { - for (int d_group = 0; d_group < out_group_size; ++d_group) - { - T filter; - for (int i = 0; i < 4; ++i) - { - const int s_ch = s * 4 + j; - const int d_ch = (d * out_group_size + d_group) * 4 + i; - if (s_ch < weights.shape.i && d_ch < weights.shape.o) - { - const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch}); - filter[i] = weights.data[f_index]; - } - else - { - filter[i] = 0.0f; - } - } - dst[counter++] = filter; - } - } - } - } - } - } -} - -template <DataType S, typename T> -void RearrangeWeightsToI4DHWIOOGroupO4(const InternalTensor<OHWDI, S> &weights, int out_group_size, - absl::Span<T> dst) -{ - const int dst_slices = DivideRoundUp(weights.shape.o, 4); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - const int dst_groups = DivideRoundUp(dst_slices, out_group_size); - - int counter = 0; - for (int j = 0; j < 4; ++j) - { - for (int z = 0; z < weights.shape.d; ++z) - { - for (int y = 0; y < weights.shape.h; ++y) - { - for (int x = 0; x < weights.shape.w; ++x) - { - for (int s = 0; s < src_slices; ++s) - { - for (int d = 0; d < dst_groups; ++d) - { - for (int d_group = 0; d_group < out_group_size; ++d_group) - { - T filter; - for (int i = 0; i < 4; ++i) - { - const int s_ch = s * 4 + j; - const int d_ch = (d * out_group_size + d_group) * 4 + i; - if (s_ch < weights.shape.i && d_ch < weights.shape.o) - { - const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch}); - filter[i] = weights.data[f_index]; - } - else - { - filter[i] = 0.0f; - } - } - dst[counter++] = filter; - } - } - } - } - } - } - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc deleted file mode 100644 index 5f1103ad9..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "open_cl/WinogradUtil.h" - -#include <cmath> -#include <vector> - -#include "open_cl/DataType.h" -#include "open_cl/Shape.h" -#include "open_cl/Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace -{ -// Matrices for Winograd trasformations were computed with the method described -// here https://openreview.net/pdf?id=H1ZaRZVKg -std::vector<float> GetTransposedMatrixForWinograd(int width, int height) -{ - const float kDelta = std::sqrt(2.0f) / 2.0f; - std::vector<float> px(width); - - px[0] = 0.0f; - const int points_count = (width - 1) / 2; - for (int i = 0; i < points_count; ++i) - { - px[i * 2 + 1] = kDelta * (i + 1.0f); - px[i * 2 + 2] = -kDelta * (i + 1.0f); - } - px[width - 1] = 1.0f; - - std::vector<float> py(width, 1.0f); - py[width - 1] = 0.0f; - - std::vector<float> result(height * width); - for (int y = 0; y < width; ++y) - { - for (int x = 0; x < height; ++x) - { - result[x * width + y] = std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x); - } - } - return result; -} - -std::vector<float> GetInversedMatrixForWinograd(int rank) -{ - auto matrix = GetTransposedMatrixForWinograd(rank, rank); - std::vector<float> inverted(rank * rank, 0.0f); - for (int i = 0; i < rank; ++i) - { - inverted[i * rank + i] = 1.0f; - } - - for (int i = 1; i < rank - 1; ++i) - { - float inv_t = 1.0f / matrix[i * rank + i]; - for (int x = i; x < rank; ++x) - { - matrix[i * rank + x] *= inv_t; - } - for (int x = 0; x < rank; ++x) - { - inverted[i * rank + x] *= inv_t; - } - - for (int y = 0; y < rank; ++y) - { - if (y == i) - continue; - float t = matrix[y * rank + i]; - for (int x = i; x < rank; ++x) - { - matrix[y * rank + x] -= t * matrix[i * rank + x]; - } - for (int x = 0; x < rank; ++x) - { - inverted[y * rank + x] -= t * inverted[i * rank + x]; - } - } - } - - return inverted; -} - -std::vector<float> Multiply(const std::vector<float> &a_mat, const std::vector<float> &b_mat, int m, - int n, int k) -{ - std::vector<float> result(m * k); - for (int y = 0; y < m; ++y) - { - for (int x = 0; x < k; ++x) - { - float sum = 0.0f; - for (int i = 0; i < n; ++i) - { - sum += a_mat[y * n + i] * b_mat[i * k + x]; - } - result[y * k + x] = sum; - } - } - return result; -} -} // namespace - -std::vector<float> AtMatrixForWinograd4x4To6x6() { return GetTransposedMatrixForWinograd(6, 4); } - -std::vector<float> BtMatrixForWinograd4x4To6x6() { return GetInversedMatrixForWinograd(6); } - -void RearrangeWeightsToWinograd4x4To6x6Weights( - const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights, - gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights) -{ - gpu_cl::OHWI dst_shape; - dst_shape.o = src_weights.shape.o; - dst_shape.h = 6; - dst_shape.w = 6; - dst_shape.i = src_weights.shape.i; - dst_weights->shape = dst_shape; - dst_weights->data.resize(dst_shape.DimensionsProduct()); - - auto gt_mat = GetTransposedMatrixForWinograd(6, 3); - std::vector<float> g_mat(gt_mat.size()); - for (int y = 0; y < 3; ++y) - { - for (int x = 0; x < 6; ++x) - { - g_mat[x * 3 + y] = gt_mat[y * 6 + x]; - } - } - - for (int d = 0; d < src_weights.shape.o; ++d) - { - for (int s = 0; s < src_weights.shape.i; ++s) - { - std::vector<float> in_vals(9); - for (int y = 0; y < 3; ++y) - { - for (int x = 0; x < 3; ++x) - { - const int f_index = src_weights.shape.LinearIndex({d, y, x, s}); - in_vals[y * 3 + x] = src_weights.data[f_index]; - } - } - - auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3); - auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6); - for (int y = 0; y < 6; ++y) - { - for (int x = 0; x < 6; ++x) - { - const int f_index = dst_shape.LinearIndex({d, y, x, s}); - dst_weights->data[f_index] = out_vals[y * 6 + x]; - } - } - } - } -} - -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h deleted file mode 100644 index 32e21760d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__ - -#include <vector> - -#include "open_cl/DataType.h" -#include "open_cl/Shape.h" -#include "open_cl/InternalTensor.h" - -namespace onert -{ -namespace backend -{ - -// Matrices for Winograd trasformations received with method described here -// https://openreview.net/pdf?id=H1ZaRZVKg - -// returns A transposed matrix(6 * 4) as array (24 values) for Winograd4x4To6x6 -std::vector<float> AtMatrixForWinograd4x4To6x6(); - -// returns B transposed matrix(6 * 6) as array (36 values) for Winograd4x4To6x6 -std::vector<float> BtMatrixForWinograd4x4To6x6(); - -void RearrangeWeightsToWinograd4x4To6x6Weights( - const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights, - gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights); - -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc deleted file mode 100644 index 847c2a2aa..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "WorkgroupSelection.h" - -#include <math.h> - -#include <set> -#include <vector> - -#include "Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -namespace -{ - -template <typename T> -void AddCornerCases(const T &grid, int max_work_group_total_size, const T &max_work_group_sizes, - WorkGroupSizeAlignment x_alignment, WorkGroupSizeAlignment y_alignment, - WorkGroupSizeAlignment z_alignment, std::vector<T> *work_groups) -{ - for (int x = 1; x <= 4; ++x) - { - for (int y = 1; y <= 4; ++y) - { - for (int z = 1; z <= 4; ++z) - { - u_int32_t wg_x = DivideRoundUp(grid.x, x); - u_int32_t wg_y = DivideRoundUp(grid.y, y); - u_int32_t wg_z = DivideRoundUp(grid.z, z); - if (wg_x > static_cast<u_int32_t>(max_work_group_sizes.x) || - wg_y > static_cast<u_int32_t>(max_work_group_sizes.y) || - wg_z > static_cast<u_int32_t>(max_work_group_sizes.z) || - wg_x * wg_y * wg_z > static_cast<u_int32_t>(max_work_group_total_size)) - { - continue; - } - if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % wg_x != 0) - { - continue; - } - if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % wg_y != 0) - { - continue; - } - if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % wg_z != 0) - { - continue; - } - work_groups->push_back({wg_x, wg_y, wg_z}); - } - } - } - - // this will add at least {1, 1, 1} always. - for (u_int32_t x = 1; x <= 4; ++x) - { - for (u_int32_t y = 1; y <= 4; ++y) - { - for (u_int32_t z = 1; z <= 4; ++z) - { - if (x > static_cast<u_int32_t>(max_work_group_sizes.x) || - y > static_cast<u_int32_t>(max_work_group_sizes.y) || - z > static_cast<u_int32_t>(max_work_group_sizes.z) || - x * y * z > static_cast<u_int32_t>(max_work_group_total_size)) - { - continue; - } - if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % x != 0) - { - continue; - } - if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % y != 0) - { - continue; - } - if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % z != 0) - { - continue; - } - work_groups->push_back({x, y, z}); - } - } - } -} - -std::vector<int> GetDivisors(int number) -{ - const int max_divisor = static_cast<int>(sqrt(number)); - std::vector<int> divisors; - // we don't know the number of dividers, so it is just heuristic. - divisors.reserve(max_divisor / 3 + 1); - for (int i = 1; i <= max_divisor; ++i) - { - const int d = number / i; - if (i * d == number) - { - divisors.push_back(i); - if (d != i) - { - divisors.push_back(d); - } - } - } - return divisors; -} - -std::vector<int> GetDivisorsForRange(int number, int range) -{ - const int last_number = number + range; - const int max_divisor = static_cast<int>(sqrt(last_number)); - std::set<int> divisors; - for (int i = 1; i <= max_divisor; ++i) - { - const int reminder = number % i; - // iterate through numbers that divisible by i in our range; - const int first_number = number + (i - reminder) % i; - if (first_number <= last_number) - { - divisors.insert(i); - } - for (int j = first_number; j <= last_number; j += i) - { - const int d = j / i; - if (d != i) - { - divisors.insert(d); - } - } - } - return std::vector<int>(divisors.begin(), divisors.end()); -} - -} // namespace - -std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment) -{ - if (z_alignment == WorkGroupSizeAlignment::PRECISE) - { - // we will use for potential sizes, sizes that cover grid precisely - // work group size * k (k is integer) == grid_size - return GetDivisors(number); - } - else - { - // when we chose work group size we can use work group size that - // work group size * k (k is integer) != grid_size (slightly bigger) - // so in this heuristic we trying to find potential size, that satisfies - // to this : work group size * k (k is integer) <= grid_size + 5 - // and this : work group size * k (k is integer) >= grid_size - return GetDivisorsForRange(number, 5); - } -} - -template <typename T> -std::vector<T> -GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size, - const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment, - WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment) -{ - std::vector<T> work_groups; - work_groups.reserve(64); - - std::vector<int> sizes_x = GetPossibleSizes(grid.x, x_alignment); - std::vector<int> sizes_y = GetPossibleSizes(grid.y, y_alignment); - std::vector<int> sizes_z = GetPossibleSizes(grid.z, z_alignment); - - for (auto x : sizes_x) - { - if (static_cast<int>(x) > static_cast<int>(max_work_group_sizes.x)) - continue; - for (auto y : sizes_y) - { - if (static_cast<int>(y) > static_cast<int>(max_work_group_sizes.y)) - continue; - for (auto z : sizes_z) - { - if (static_cast<int>(z) > static_cast<int>(max_work_group_sizes.z)) - continue; - const int work_group_size = x * y * z; - if (work_group_size < min_work_group_total_size || - work_group_size > max_work_group_total_size) - continue; - work_groups.push_back({x, y, z}); - } - } - } - - return work_groups; -} - -// Specializations of GenerateWorkGroupSizes for int3 and uint3 - -template std::vector<int3> GenerateWorkGroupSizes(const int3 &grid, int min_work_group_total_size, - int max_work_group_total_size, - const int3 &max_work_group_sizes, - WorkGroupSizeAlignment x_alignment, - WorkGroupSizeAlignment y_alignment, - WorkGroupSizeAlignment z_alignment); - -template std::vector<uint3> GenerateWorkGroupSizes(const uint3 &grid, int min_work_group_total_size, - int max_work_group_total_size, - const uint3 &max_work_group_sizes, - WorkGroupSizeAlignment x_alignment, - WorkGroupSizeAlignment y_alignment, - WorkGroupSizeAlignment z_alignment); - -template <typename T> -void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size, - const int max_work_group_invocations, - std::vector<T> *work_groups) -{ - auto alignment = WorkGroupSizeAlignment::PRECISE; - *work_groups = - GenerateWorkGroupSizes<T>(grid, /*min_work_group_total_size = */ 32, max_work_group_invocations, - max_work_group_size, alignment, alignment, alignment); - // If the grid parameter too small, method below cannot generate workgroups. - if (work_groups->empty()) - { - AddCornerCases(grid, max_work_group_invocations, max_work_group_size, alignment, alignment, - alignment, work_groups); - } -} - -// Specializations of GenerateWorkGroupSizesAlignedToGrid for int3 and uint3 - -template void GenerateWorkGroupSizesAlignedToGrid(const int3 &grid, const int3 &max_work_group_size, - const int max_work_group_invocations, - std::vector<int3> *work_groups); - -template void GenerateWorkGroupSizesAlignedToGrid(const uint3 &grid, - const uint3 &max_work_group_size, - const int max_work_group_invocations, - std::vector<uint3> *work_groups); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h deleted file mode 100644 index b0702ac7c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__ - -#include <vector> - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// PRECISE assume that WorkGroupSize * k = GridSize; -// NO_ALIGNMENT no restrictions; -// We need PRECISE when we don't have check in kernel for boundaries -// If we have the check, we can use PRECISE or NO_ALIGNMENT as well. -enum class WorkGroupSizeAlignment -{ - PRECISE, - NO_ALIGNMENT -}; - -std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment); - -// Specializations exist for int3 and uint3 in the .cc file - -template <typename T> -std::vector<T> -GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size, - const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment, - WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment); - -template <typename T> -void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size, - const int max_work_group_invocations, - std::vector<T> *work_groups); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc deleted file mode 100644 index 09100fe1f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Add.h" - -#include <cstring> -#include <string> - -#include "absl/strings/str_cat.h" -#include "Util.h" -#include "open_cl/Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels, - int dst_channels) -{ - GPUOperation add(definition); - int dst_depth = DivideRoundUp(dst_channels, 4); - int src0_depth = DivideRoundUp(channels[0], 4); - add.elementwise_ = true; - add.linkable_ = dst_depth == src0_depth; - if (src0_depth < dst_depth) - { - add.check_src_channels_size_ = true; - } - for (uint32_t i = 1; i < definition.src_tensors.size(); ++i) - { - const std::string tensor_name = absl::StrCat("src_data_", i); - auto src_desc = definition.src_tensors[i]; - if (definition.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - add.AddSrcTensor(tensor_name, src_desc); - add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n"; - add.code_ += " in_out_value += args." + tensor_name + ".Read(X_COORD, Y_COORD, S_COORD);\n"; - add.code_ += "}\n"; - } - return add; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h deleted file mode 100644 index 2335a901c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__ - -#include <string> -#include <vector> - -#include "GpuOperation.h" -#include "open_cl/Operations.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// Add operation supports not equal tensors on input (for possibility to -// remove Padding operation with zeroes in channels dimension) -GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels, - int dst_channels); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc deleted file mode 100644 index 1b9014fdf..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "open_cl/kernels/ConvBuffer1x1.h" - -#include <array> -#include <string> -#include <utility> - -#include "open_cl/ClDevice.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/Precision.h" -#include "open_cl/TensorType.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -// element_size must be 1, 2 or 4 -// 1 - is FLT4 -// 2 - is FLT8 -// 4 - is FLT16 -// This function generates code for arithmetic part of convolution -std::string GetComputationPart(const int3 &block_size, int element_size, - CalculationsPrecision precision) -{ - const std::string hexes[16] = {"0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "a", "b", "c", "d", "e", "f"}; - std::string c; - for (int z = 0; z < block_size.z; ++z) - { - const std::string z_s = std::to_string(z); - c += " FLT16 W" + z_s + " = weights_cache[" + z_s + "];\n"; - for (int y = 0; y < block_size.y; ++y) - { - for (int x = 0; x < block_size.x; ++x) - { - std::string s_index = std::to_string(y * block_size.x + x); - for (int e = 0; e < element_size; ++e) - { - std::string r_index = z_s + std::to_string(y) + std::to_string(x * element_size + e); - const std::string f0 = "W" + z_s + ".s0123"; - const std::string f1 = "W" + z_s + ".s4567"; - const std::string f2 = "W" + z_s + ".s89ab"; - const std::string f3 = "W" + z_s + ".scdef"; - switch (precision) - { - case CalculationsPrecision::F32: - case CalculationsPrecision::F16: - c += " r" + r_index + " += " + f0 + " * s" + s_index + ".s" + hexes[e * 4 + 0] + - ";\n"; - c += " r" + r_index + " += " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] + - ";\n"; - c += " r" + r_index + " += " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] + - ";\n"; - c += " r" + r_index + " += " + f3 + " * s" + s_index + ".s" + hexes[e * 4 + 3] + - ";\n"; - break; - case CalculationsPrecision::F32_F16: - c += " r" + r_index + " += convert_float4(" + f0 + " * s" + s_index + ".s" + - hexes[e * 4 + 0] + " + " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] + - " + " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] + " + " + f3 + " * s" + - s_index + ".s" + hexes[e * 4 + 3] + ");\n"; - break; - } - } - } - } - } - return c; -} - -ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info, - const OperationDef &definition, const BHWC &shape, int, - int dst_depth) -{ - ConvBuffer1x1::ConvParams conv_params; - conv_params.element_size = 4; - conv_params.block_size = int3(1, 1, 1); - if (!device_info.IsMali()) - { - return conv_params; - } - bool can_use_flt8 = - (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32; - bool is_midgard = device_info.IsMali() && device_info.mali_info.IsMidgard(); - if (is_midgard) - { - if (can_use_flt8) - { - conv_params.element_size = 8; - } - if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8) - { - conv_params.block_size.x = 2; - } - return conv_params; - } - - int task_size = shape.w * shape.b * shape.h * dst_depth; - int block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size); - - if (!can_use_flt8 && block_size > 4) - { - block_size = 4; - } - - if (can_use_flt8 && block_size >= 2) - { - conv_params.element_size = 8; - block_size /= 2; - } - if (block_size == 4) - { - conv_params.block_size.x = 2; - if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32) - { - conv_params.block_size.y = 2; - } - else - { - conv_params.block_size.z = 2; - } - } - else if (block_size == 2) - { - if (dst_depth >= 32) - { - conv_params.block_size.z = 2; - } - else - { - conv_params.block_size.x = 2; - } - } - - return conv_params; -} - -ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info, - const OperationDef &definition, int, int) -{ - ConvBuffer1x1::ConvParams conv_params; - conv_params.element_size = 4; - conv_params.block_size = int3(1, 1, 1); - if (device_info.IsMali() && definition.precision == CalculationsPrecision::F16 && - device_info.compute_units_count <= 4) - { - conv_params.block_size.x *= 2; - } - return conv_params; -} - -} // namespace - -ConvBuffer1x1::ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params) - : GPUOperation(definition), conv_params_(conv_params) -{ - code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_); - work_group_size_ = int3(2, 4, 1); -} - -ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1 &&operation) - : GPUOperation(std::move(operation)), conv_params_(std::move(operation.conv_params_)) -{ -} - -ConvBuffer1x1 &ConvBuffer1x1::operator=(ConvBuffer1x1 &&operation) -{ - if (this != &operation) - { - std::swap(conv_params_, operation.conv_params_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -std::string ConvBuffer1x1::GenerateConvBuffer1x1(const OperationDef &op_def, - const ConvBuffer1x1::ConvParams &conv_params, - Arguments *) -{ - auto src_desc = op_def.src_tensors[0]; - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - if (conv_params_.element_size == 8) - { - src_desc.SetStateVar("ElementsX2", "true"); - } - else if (conv_params_.element_size == 16) - { - src_desc.SetStateVar("ElementsX4", "true"); - } - AddSrcTensor("src_tensor", src_desc); - if (op_def.src_tensors.size() == 2) - { - // dynamic weights - BufferDescriptor desc; - desc.element_type = op_def.src_tensors[1].data_type; - desc.element_size = 16; - desc.memory_type = MemoryType::GLOBAL; - AddSrcBuffer("weights", desc); - } - - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - AddDstTensor("dst_tensor", dst_desc); - - std::string c = GetCommonDefines(op_def.precision); - switch (op_def.precision) - { - case CalculationsPrecision::F32: - c += "#define FLT8 float8\n"; - c += "#define FLT16 float16\n"; - break; - case CalculationsPrecision::F32_F16: - case CalculationsPrecision::F16: - c += "#define FLT8 half8\n"; - c += "#define FLT16 half16\n"; - break; - } - - const int3 block_size = conv_params.block_size; - const int element_size = conv_params.element_size / 4; - - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0) * " + std::to_string(block_size.x * element_size) + ";\n"; - c += " int X_SRC = get_global_id(0) * " + std::to_string(block_size.x) + ";\n"; - c += " int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n"; - c += " int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) return;\n"; - if (conv_params.different_weights_for_height) - { - c += " __global FLT16* weights_cache = args.weights.GetPtr() + (Z * " - "args.src_tensor.Height() + " - "Y * " + - std::to_string(block_size.z) + - ") * " - "args.src_tensor.Slices();\n"; - } - else - { - c += " __global FLT16* weights_cache = args.weights.GetPtr() + Z * " - "args.src_tensor.Slices();\n"; - } - for (int z = 0; z < block_size.z; ++z) - { - const std::string z_s = std::to_string(z); - c += " ACCUM_FLT4 bias_val_" + z_s + " = TO_ACCUM_TYPE(args.biases.Read(Z + " + z_s + "));\n"; - for (int y = 0; y < block_size.y; ++y) - { - for (int x = 0; x < block_size.x * element_size; ++x) - { - c += " ACCUM_FLT4 r" + z_s + std::to_string(y) + std::to_string(x) + " = bias_val_" + z_s + - ";\n"; - } - } - } - for (int x = 0; x < block_size.x; ++x) - { - std::string x_s = std::to_string(x); - c += " int xc" + x_s + " = min(X_SRC + " + std::to_string(x) + - ", args.src_tensor.Width() - 1);\n"; - } - for (int y = 0; y < block_size.y; ++y) - { - std::string y_s = std::to_string(y); - c += " int yc" + y_s + " = min(Y + " + y_s + ", args.src_tensor.Height() - 1);\n"; - } - for (int y = 0; y < block_size.y; ++y) - { - std::string y_s = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - std::string x_s = std::to_string(x); - std::string i_s = std::to_string(y * block_size.x + x); - c += " int src_addr_" + i_s + " = (yc" + y_s + ") * args.src_tensor.Width() + (xc" + x_s + - ");\n"; - } - } - c += " for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n"; - for (int y = 0; y < block_size.y; ++y) - { - std::string y_s = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - std::string x_s = std::to_string(x); - std::string i_s = std::to_string(y * block_size.x + x); - c += " FLT" + std::to_string(element_size * 4) + " s" + i_s + - " = args.src_tensor.Read(src_addr_" + i_s + ");\n"; - } - } - c += GetComputationPart(block_size, element_size, op_def.precision); - for (int i = 0; i < block_size.x * block_size.y; ++i) - { - std::string i_s = std::to_string(i); - c += " src_addr_" + i_s + " += args.src_tensor.SliceStride();\n"; - } - c += " weights_cache += " + std::to_string(block_size.z) + ";\n"; - c += " }\n"; // SRC_SLICES - - for (int z = 0; z < block_size.z; ++z) - { - const std::string z_s = std::to_string(z); - if (z != 0) - { - c += " if (Z + " + z_s + " >= args.dst_tensor.Slices()) return;\n"; - } - for (int y = 0; y < block_size.y; ++y) - { - const std::string y_s = std::to_string(y); - for (int x = 0; x < block_size.x * element_size; ++x) - { - const std::string x_s = std::to_string(x); - c += " if (X + " + x_s + " < args.dst_tensor.Width() && Y + " + y_s + - " < args.dst_tensor.Height()) {\n"; - c += " FLT4 res = TO_FLT4(r" + z_s + y_s + x_s + ");\n"; - c += " args.dst_tensor.Write(res, X + " + x_s + ", Y + " + y_s + ", Z + " + z_s + ");\n"; - c += " }\n"; - } - } - } - c += "}\n"; - return c; -} - -int3 ConvBuffer1x1::GetGridSize() const -{ - const int dst_width_elements = - DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4)); - const int grid_x = DivideRoundUp(dst_width_elements, conv_params_.block_size.x); - const int grid_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y); - const int grid_z = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.z); - return int3(grid_x, grid_y, grid_z); -} - -void ConvBuffer1x1::GetPossibleKernelWorkGroups(TuningType tuning_type, - const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const -{ - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups); -} - -bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr) -{ - auto src_storage_type = definition.src_tensors[0].storage_type; - return src_storage_type == TensorStorageType::BUFFER && attr.weights.shape.w == 1 && - attr.weights.shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 && - attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 && - attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 && - attr.padding.appended.h == 0; -} - -bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape, - const Convolution2DAttributes &attr) -{ - auto src_storage_type = definition.src_tensors[0].storage_type; - return src_storage_type == TensorStorageType::BUFFER && weights_shape.w == 1 && - weights_shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 && - attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 && - attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 && - attr.padding.appended.h == 0; -} - -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvBuffer1x1::ConvParams conv_params; - if (shape) - { - conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth); - } - else - { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); - } - ConvBuffer1x1 result(definition, conv_params); - result.UploadData(attr.weights, attr.bias); - return result; -} - -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition, - const FullyConnectedAttributes &attr, const BHWC *shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvBuffer1x1::ConvParams conv_params; - if (shape) - { - conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth); - } - else - { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); - } - conv_params.block_size.x *= conv_params.block_size.y; - conv_params.block_size.y = 1; - ConvBuffer1x1 result(definition, conv_params); - result.UploadData(attr.weights, attr.bias); - return result; -} - -ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvBuffer1x1::ConvParams conv_params; - if (shape) - { - conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth); - } - else - { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); - } - conv_params.block_size.x *= conv_params.block_size.y; - conv_params.block_size.y = 1; - conv_params.different_weights_for_height = true; - ConvBuffer1x1 result(definition, conv_params); - result.UploadDataForWinograd4x4To6x6(attr.weights); - return result; -} - -ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, const BHWC *dst_shape) -{ - const int dst_depth = DivideRoundUp(weights_shape.b, 4); - const int src_depth = DivideRoundUp(weights_shape.c, 4); - ConvBuffer1x1::ConvParams conv_params; - if (dst_shape) - { - conv_params = GetBestParams(device_info, definition, *dst_shape, src_depth, dst_depth); - } - else - { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); - } - ConvBuffer1x1 result(definition, conv_params); - result.UploadBiases(attr.bias); - return result; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h deleted file mode 100644 index 0abd6051f..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__ - -#include "open_cl/Buffer.h" -#include "open_cl/ClKernel.h" -#include "open_cl/kernels/ConvCommon.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/LinearStorage.h" -#include "open_cl/Precision.h" -#include "open_cl/InternalTensor.h" -#include "open_cl/Util.h" -#include "open_cl/DataType.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" -#include "open_cl/WinogradUtil.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class ConvBuffer1x1 : public GPUOperation -{ -public: - ConvBuffer1x1() = default; - - // Move only - ConvBuffer1x1(ConvBuffer1x1 &&operation); - ConvBuffer1x1 &operator=(ConvBuffer1x1 &&operation); - ConvBuffer1x1(const ConvBuffer1x1 &) = delete; - ConvBuffer1x1 &operator=(const ConvBuffer1x1 &) = delete; - - void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const override; - int3 GetGridSize() const override; - - ConvWeightsDescription GetConvWeightsDescription() const - { - ConvWeightsDescription desc; - desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4; - desc.output_group_size = conv_params_.block_size.z; - return desc; - } - - struct ConvParams - { - int3 block_size = int3(1, 1, 1); - int element_size = 4; // can be 4, 8 or 16 - - // By default in 2d convolution we have the same weights for WH dims, but in - // some cases we need separate weights for H dimension and convolution - // kernel requires very small modifications to support it. - bool different_weights_for_height = false; - }; - -private: - ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params); - friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *shape); - friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, - const OperationDef &definition, - const FullyConnectedAttributes &attr, const BHWC *shape); - friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *shape); - friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, - const BHWC *dst_shape); - - template <DataType T> - void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases); - template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights); - - template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights); - - template <DataType T> void UploadBiases(const InternalTensor<Linear, T> &biases); - - std::string GenerateConvBuffer1x1(const OperationDef &op_def, - const ConvBuffer1x1::ConvParams &conv_params, Arguments *args); - - ConvParams conv_params_; -}; - -template <DataType T> -void ConvBuffer1x1::UploadData(const InternalTensor<OHWI, T> &weights, - const InternalTensor<Linear, T> &biases) -{ - UploadWeights(weights); - UploadBiases(biases); -} - -template <DataType T> -void ConvBuffer1x1::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights) -{ - InternalTensor<OHWI, T> wino_weights; - RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights); - UploadWeights(wino_weights); - InternalTensor<Linear, DataType::FLOAT32> bias; - bias.shape = Linear(weights.shape.o); - bias.data.resize(weights.shape.o, 0.0f); - UploadBiases(bias); -} - -template <DataType T> void ConvBuffer1x1::UploadWeights(const InternalTensor<OHWI, T> &weights) -{ - const int dst_depth = DivideRoundUp(weights.shape.o, 4); - const int src_depth = DivideRoundUp(weights.shape.i, 4); - - const bool f32_weights = definition_.precision == CalculationsPrecision::F32; - const int float4_size = sizeof(float4); - // TODO - // f32_weights ? sizeof(float4) : sizeof(half4); - - const int dst_depth_aligned = AlignByN(dst_depth, conv_params_.block_size.z); - const int elements_count = weights.shape.h * weights.shape.w * src_depth * dst_depth_aligned * 4; - - BufferDescriptor desc; - desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 16; - desc.memory_type = MemoryType::GLOBAL; - desc.size = float4_size * elements_count; - desc.data.resize(desc.size); - - if (f32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(desc.data.data()); - RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z, - absl::MakeSpan(ptr, elements_count)); - } - // else - // { - // half4 *ptr = reinterpret_cast<half4 *>(desc.data.data()); - // RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z, - // absl::MakeSpan(ptr, elements_count)); - // } - - args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc))); -} - -template <DataType T> void ConvBuffer1x1::UploadBiases(const InternalTensor<Linear, T> &biases) -{ - TensorLinearDescriptor desc; - desc.storage_type = LinearStorageType::BUFFER; - desc.element_type = definition_.GetDataType(); - int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4; - desc.UploadLinearData(biases, depth); - args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); -} - -bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr); - -bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape, - const Convolution2DAttributes &attr); - -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *shape = nullptr); - -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition, - const FullyConnectedAttributes &attr, - const BHWC *shape = nullptr); - -ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, - const BHWC *dst_shape = nullptr); - -ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *shape = nullptr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc deleted file mode 100644 index 0a51bab5c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "open_cl/kernels/ConvConstants.h" - -#include <string> -#include <utility> - -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/Precision.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ -// Adreno can provide up to ~3-4KB of constant memory, but in some cases even -// 3KB can have very bad performance. -int GetAdrenoOptimalMaxConstantSize(int gpu_version) -{ - if (gpu_version < 600) - { - return 256 * 10; // 2.5KB - } - else - { - return 256 * 14; // 3.5KB - } -} - -int GetOptimalMaxConstantSize(const DeviceInfo &info) -{ - if (!info.IsAdreno()) - { - // In general we do not expect that this kernel will be used with non Adreno - // so as it tuned for __constant memory that have big profit on Adreno - return 1024; // 1KB - } - else - { - return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version); - } -} - -std::string GenerateConvolutionConstantCode(const OperationDef &op_def, const OHWI &weights_shape, - bool stride_correction, GPUOperation *op) -{ - auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddSrcTensor("src_tensor", src_desc); - - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddDstTensor("dst_tensor", dst_desc); - - std::string c = GetCommonDefines(op_def.precision); - - const int out_z = DivideRoundUp(weights_shape.o, 4); - const std::string kOutZ = std::to_string(out_z); - const int src_depth = DivideRoundUp(weights_shape.i, 4); - - const auto src_tensor_type = op_def.src_tensors[0].storage_type; - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || - src_tensor_type == TensorStorageType::IMAGE_BUFFER; - - switch (op_def.precision) - { - case CalculationsPrecision::F32: - case CalculationsPrecision::F16: - c += "#define CONV4(R, SRC, F, i) \\\n"; - c += " R += SRC.x * F[i + 0]; \\\n"; - c += " R += SRC.y * F[i + 1]; \\\n"; - c += " R += SRC.z * F[i + 2]; \\\n"; - c += " R += SRC.w * F[i + 3]; \n"; - - c += "#define CONV3(R, SRC, F, i) \\\n"; - c += " R += SRC.x * F[i + 0]; \\\n"; - c += " R += SRC.y * F[i + 1]; \\\n"; - c += " R += SRC.z * F[i + 2]; \n"; - - c += "#define CONV2(R, SRC, F, i) \\\n"; - c += " R += SRC.x * F[i + 0]; \\\n"; - c += " R += SRC.y * F[i + 1]; \n"; - - c += "#define CONV1(R, SRC, F, i) \\\n"; - c += " R += SRC * F[i + 0]; \n"; - break; - case CalculationsPrecision::F32_F16: - c += "#define CONV4(R, SRC, F, i) \\\n"; - c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]"; - c += " + SRC.z * F[i + 2] + SRC.w * F[i + 3]);\n"; - - c += "#define CONV3(R, SRC, F, i) \\\n"; - c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]"; - c += " + SRC.z * F[i + 2]);\n"; - - c += "#define CONV2(R, SRC, F, i) \\\n"; - c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]);\n"; - - c += "#define CONV1(R, SRC, F, i) \\\n"; - c += " R += convert_float4(SRC * F[i + 0]);\n"; - break; - } - - const std::string postfixes[] = {".x", ".xy", ".xyz", ""}; - - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - c += " int Y = get_global_id(1);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) " - "return;\n"; - if (stride_correction) - { - c += " int start_x = " + - GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") + - ";\n"; - } - else - { - if (op_def.IsBatchSupported()) - { - c += " int start_x = X * args.stride_x + args.padding_x * " - "args.src_tensor.Batch();\n"; - } - else - { - c += " int start_x = X * args.stride_x + args.padding_x;\n"; - } - } - c += " int start_y = Y * args.stride_y + args.padding_y;\n"; - c += " ACCUM_FLT4 r[" + kOutZ + "];\n"; - c += " for (int i = 0; i < " + kOutZ + "; ++i) {\n"; - c += " r[i] = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - c += " }\n"; - int filters_counter = 0; - for (int s = 0; s < src_depth; ++s) - { - const int ch_count = std::min(4, weights_shape.i - s * 4); - const std::string s_conv = "CONV" + std::to_string(ch_count); - const std::string s_count = ch_count == 1 ? "" : std::to_string(ch_count); - const std::string s_type = absl::StrCat("FLT", s_count); - const std::string s_postfix = postfixes[ch_count - 1]; - const std::string dilation_x = - op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x"; - for (int ky = 0; ky < weights_shape.h; ++ky) - { - std::string s_y = absl::StrCat("(start_y + ", ky, " * args.dilation_y)"); - if (manual_clamp) - { - c += " {\n"; - c += " bool y_out = " + s_y + " < 0 || " + s_y + " >= args.src_tensor.Height();\n"; - } - for (int kx = 0; kx < weights_shape.w; ++kx) - { - c += " {\n"; - std::string s_x = absl::StrCat("(start_x + ", kx, " * " + dilation_x + ")"); - if (manual_clamp) - { - c += " bool x_out = " + s_x + "< 0 || " + s_x + ">= args.src_tensor.Width();\n"; - c += " " + s_type + " src = x_out || y_out ?"; - c += "(" + s_type + ")(0.0) : args.src_tensor.Read(" + s_x + ", " + s_y + ", " + - std::to_string(s) + ")" + s_postfix + ";\n"; - } - else - { - c += " " + s_type + " src = args.src_tensor.Read(" + s_x + ", " + s_y + ", " + - std::to_string(s) + ")" + s_postfix + ";\n"; - } - for (int d = 0; d < out_z; ++d) - { - c += " " + s_conv + "(r[" + std::to_string(d) + "], src, args.weigths.GetPtr(),"; - c += " " + std::to_string(filters_counter) + ");\n"; - filters_counter += ch_count; - } - c += " }\n"; - } - if (manual_clamp) - { - c += " }\n"; - } - } - } - for (int i = 0; i < out_z; ++i) - { - std::string s_i = std::to_string(i); - c += " {\n"; - c += " FLT4 res = TO_FLT4(r[" + s_i + "]) + args.biases.Read(" + s_i + ");\n"; - c += " args.dst_tensor.Write(res, X, Y, " + s_i + ");\n"; - c += " }\n"; - } - c += "}\n"; - return c; -} - -} // namespace - -bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr) -{ - if (device_info.IsAMD() && definition.precision != CalculationsPrecision::F32 && - definition.src_tensors[0].storage_type != TensorStorageType::BUFFER) - { - // BUG, some AMD gpus crashe without it - return false; - } - - const auto &w_shape = attr.weights.shape; - const int dst_channels = AlignByN(w_shape.o, 4); - const int filters_count = w_shape.i * dst_channels * w_shape.h * w_shape.w; - const int float_size = sizeof(float); - // TODO F32 and F16 - // definition.precision == CalculationsPrecision::F32 ? sizeof(float) : sizeof(half); - const int filters_buffer_size = filters_count * float_size; - const int kConstantMaxSize = GetOptimalMaxConstantSize(device_info); - const int flt4_registers = DivideRoundUp(w_shape.o, 4); - return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8; -} - -GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr) -{ - GPUOperation op(definition); - UploadWeightsForConvConstants(attr.weights, definition.precision, &op); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("stride_y", attr.strides.h); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("dilation_x", attr.dilations.w); - op.args_.AddInt("dilation_y", attr.dilations.h); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1; - - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - op.code_ = - GenerateConvolutionConstantCode(definition, attr.weights.shape, stride_correction, &op); - if (definition.precision == CalculationsPrecision::F16 && device_info.IsAdreno3xx()) - { - op.compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); - } - if (definition.precision != CalculationsPrecision::F32 && device_info.IsPowerVR()) - { - // BUG, some PowerVRs (GE8320) produce incorrect result without it - op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE); - } - - TensorLinearDescriptor desc; - desc.storage_type = LinearStorageType::BUFFER; - desc.element_type = definition.GetDataType(); - desc.memory_type = MemoryType::CONSTANT; - desc.UploadLinearData(attr.bias); - op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h deleted file mode 100644 index be6670c53..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__ - -#include "open_cl/Buffer.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/LinearStorage.h" -#include "open_cl/Tensor.h" -#include "open_cl/Util.h" -#include "open_cl/DataType.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <DataType S, typename T> -void RearrangeWeightsForConvConstants(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst) -{ - const int dst_depth = DivideRoundUp(weights.shape.o, 4); - const int src_depth = DivideRoundUp(weights.shape.i, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - - int counter = 0; - for (int s = 0; s < src_depth; ++s) - { - for (int y = 0; y < kernel_y; ++y) - { - for (int x = 0; x < kernel_x; ++x) - { - for (int d = 0; d < dst_depth; ++d) - { - const int channels_count = std::min(4, weights.shape.i - s * 4); - T filters[4]; - for (int i = 0; i < 4; ++i) - { - for (int j = 0; j < channels_count; ++j) - { - const int s_ch = s * 4 + j; - const int d_ch = d * 4 + i; - if (s_ch < weights.shape.i && d_ch < weights.shape.o) - { - const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch}); - filters[i][j] = weights.data[f_index]; - } - else - { - filters[i][j] = 0.0f; - } - } - } - T filters_new[4]; - for (int i = 0; i < 4; ++i) - { - for (int j = 0; j < 4; ++j) - { - filters_new[i][j] = filters[j][i]; - } - } - for (int i = 0; i < channels_count; ++i) - { - dst[counter++] = filters_new[i]; - } - } - } - } - } -} - -template <DataType T> -void UploadWeightsForConvConstants(const InternalTensor<OHWI, T> &weights, - CalculationsPrecision precision, GPUOperation *op) -{ - const int dst_depth = DivideRoundUp(weights.shape.o, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - - const bool f32_weights = precision == CalculationsPrecision::F32; - const int float_size = f32_weights ? 4 : 2; - const int float_count = weights.shape.i * dst_depth * 4 * kernel_x * kernel_y; - - BufferDescriptor desc; - desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 4; - desc.memory_type = MemoryType::CONSTANT; - desc.size = float_size * float_count; - desc.data.resize(desc.size); - - if (f32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(desc.data.data()); - RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4)); - } - // else - // { - // half4 *ptr = reinterpret_cast<half4 *>(desc.data.data()); - // RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4)); - // } - - op->args_.AddObject("weigths", absl::make_unique<BufferDescriptor>(std::move(desc))); -} - -bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr); - -GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc deleted file mode 100644 index 5cb0c2719..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc +++ /dev/null @@ -1,1653 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "open_cl/kernels/ConvPowervr.h" - -#include <algorithm> -#include <string> -#include <utility> - -#include "absl/strings/substitute.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/Precision.h" -#include "open_cl/TensorType.h" -#include "open_cl/DataType.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ -std::string GenerateUploadByThreads(const std::string &local_ptr_name, - const std::string &global_ptr_name, - const std::string &global_offset_name, - const std::string &lid_name, int total_work_items, - int elements_to_upload) -{ - std::string c; - std::string offset = global_offset_name.empty() ? "" : global_offset_name + " + "; - const int groups = elements_to_upload / total_work_items; - const int reminder = elements_to_upload % total_work_items; - for (int i = 0; i < groups; ++i) - { - c += " " + local_ptr_name + "[" + lid_name + " + " + std::to_string(total_work_items * i) + - "] = " + global_ptr_name + "[" + offset + lid_name + " + " + - std::to_string(total_work_items * i) + "];\n"; - } - if (reminder != 0) - { - c += " if (" + lid_name + " < " + std::to_string(reminder) + ") {\n"; - c += " " + local_ptr_name + "[" + lid_name + " + " + - std::to_string(total_work_items * groups) + "] = " + global_ptr_name + "[" + offset + - lid_name + " + " + std::to_string(total_work_items * groups) + "];\n"; - c += " }\n"; - } - return c; -} - -std::string GenerateAsyncUpload(const std::string &local_ptr_name, - const std::string &global_ptr_name, - const std::string &global_offset_name, int elements_to_upload) -{ - std::string c; - std::string offset = global_offset_name.empty() ? "" : " + " + global_offset_name; - c += " async_work_group_copy(" + local_ptr_name + ", " + global_ptr_name + offset + ", " + - std::to_string(elements_to_upload) + ", 0);\n"; - return c; -} - -std::string GenerateBlockCoords(const int4 &block_size, const int3 &work_group_launch_order, - bool linear_spatial, bool need_depth) -{ - std::string c; - int3 launch_remap; - launch_remap[work_group_launch_order.x] = 0; - launch_remap[work_group_launch_order.y] = 1; - launch_remap[work_group_launch_order.z] = 2; - if (linear_spatial) - { - if (work_group_launch_order[0] == 0) - { - c += " int linear_spatial = get_global_id(0);\n"; - } - else - { - c += " int linear_spatial = get_group_id(" + std::to_string(launch_remap[0]) + - ") * get_local_size(0) + get_local_id(0);\n"; - } - if (need_depth) - { - c += " int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) + - ";\n"; - c += " linear_spatial = linear_spatial / args.task_size_x;\n"; - c += " int DST_Y = (linear_spatial % args.task_size_y) * " + std::to_string(block_size.y) + - ";\n"; - c += " int DST_Z = (linear_spatial / args.task_size_y) * " + std::to_string(block_size.z) + - ";\n"; - } - else - { - c += " int DST_Y = (linear_spatial / args.task_size_x) * " + std::to_string(block_size.y) + - ";\n"; - c += " int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) + - ";\n"; - } - if (work_group_launch_order[1] == 1) - { - c += " int DST_S = get_global_id(1) * " + std::to_string(block_size.w) + ";\n"; - } - else - { - c += " int DST_S = (get_group_id(" + std::to_string(launch_remap[1]) + - ") * get_local_size(1) + get_local_id(1)) * " + std::to_string(block_size.w) + ";\n"; - } - } - else - { - if (work_group_launch_order[0] == 0) - { - c += " int DST_X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n"; - } - else - { - c += " int DST_X = (get_group_id(" + std::to_string(launch_remap[0]) + - ") * get_local_size(0) + get_local_id(0)) * " + std::to_string(block_size.x) + ";\n"; - } - std::string global_id_1; - if (work_group_launch_order[1] == 1) - { - global_id_1 = "get_global_id(1)"; - } - else - { - global_id_1 = "(get_group_id(" + std::to_string(launch_remap[1]) + - ") * get_local_size(1) + get_local_id(1))"; - } - if (need_depth) - { - c += " int linear_id_1 = " + global_id_1 + ";\n"; - c += - " int DST_Z = (linear_id_1 / args.task_size_y) * " + std::to_string(block_size.z) + ";\n"; - c += - " int DST_Y = (linear_id_1 % args.task_size_y) * " + std::to_string(block_size.y) + ";\n"; - } - else - { - c += " int DST_Y = " + global_id_1 + " * " + std::to_string(block_size.y) + ";\n"; - } - if (work_group_launch_order[2] == 2) - { - c += " int DST_S = get_global_id(2) * " + std::to_string(block_size.w) + ";\n"; - } - else - { - c += " int DST_S = (get_group_id(" + std::to_string(launch_remap[2]) + - ") * get_local_size(2) + get_local_id(2)) * " + std::to_string(block_size.w) + ";\n"; - } - } - - return c; -} -} // namespace - -ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr, - const DeviceInfo &device_info, const BHWC *dst_shape) - : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 1, 1), - dilation_(attr.dilations.w, attr.dilations.h, 1, 1), - conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) -{ -} - -ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr, - const BHWC &weights_shape, const DeviceInfo &device_info, - const BHWC *dst_shape) - : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), - kernel_size_(weights_shape.w, weights_shape.h, 1, 1), - dilation_(attr.dilations.w, attr.dilations.h, 1, 1), - conv_params_(GuessBestParams(device_info, definition, attr, weights_shape, dst_shape)) -{ -} - -ConvPowerVR::ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr, - const DeviceInfo &device_info, const BHWC *dst_shape) - : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1), - dilation_(1, 1, 1, 1), conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) -{ -} - -ConvPowerVR::ConvPowerVR(const OperationDef &definition) - : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1), - dilation_(1, 1, 1, 1) -{ -} - -ConvPowerVR::ConvPowerVR(ConvPowerVR &&operation) - : GPUOperation(std::move(operation)), stride_(operation.stride_), padding_(operation.padding_), - kernel_size_(operation.kernel_size_), dilation_(operation.dilation_), - conv_params_(operation.conv_params_) -{ -} - -ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr, - const DeviceInfo &device_info, const BHWDC *dst_shape) - : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 1), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d, 1), - dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 1), - conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) -{ -} - -ConvPowerVR &ConvPowerVR::operator=(ConvPowerVR &&operation) -{ - if (this != &operation) - { - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(kernel_size_, operation.kernel_size_); - std::swap(dilation_, operation.dilation_); - std::swap(conv_params_, operation.conv_params_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -void ConvPowerVR::GenerateCode(const DeviceInfo &device_info) -{ - if (conv_params_.linear_spatial) - { - grid_dimension_ = 2; - } - const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = GenerateConv(device_info, definition_, stride_correction, conv_params_); - if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR()) - { - compiler_options_.push_back(CompilerOptions::POWERVR_FP16); - } - if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher()) - { - compiler_options_.push_back(CompilerOptions::CL_2_0); - } - bool kernel_is_trivial = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1; - if (definition_.src_tensors[0].HasAxis(Axis::DEPTH)) - { - kernel_is_trivial = kernel_is_trivial & conv_params_.z_kernel_is_1; - } - if (device_info.IsAdreno3xx() && definition_.precision == CalculationsPrecision::F16 && - kernel_is_trivial) - { - compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); - } -} - -absl::Status ConvPowerVR::BindArguments(ArgumentsBinder *args) -{ - if (!conv_params_.x_kernel_is_1) - { - RETURN_IF_ERROR(args->SetInt("stride_x", stride_.x)); - RETURN_IF_ERROR(args->SetInt("padding_x", padding_.x * src_[0]->Batch())); - RETURN_IF_ERROR(args->SetInt("kernel_size_x", kernel_size_.x)); - RETURN_IF_ERROR(args->SetInt("dilation_x", dilation_.x * src_[0]->Batch())); - } - if (!conv_params_.y_kernel_is_1) - { - RETURN_IF_ERROR(args->SetInt("stride_y", stride_.y)); - RETURN_IF_ERROR(args->SetInt("padding_y", padding_.y)); - RETURN_IF_ERROR(args->SetInt("kernel_size_y", kernel_size_.y)); - RETURN_IF_ERROR(args->SetInt("dilation_y", dilation_.y)); - } - if (definition_.src_tensors[0].HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) - { - RETURN_IF_ERROR(args->SetInt("stride_z", stride_.z)); - RETURN_IF_ERROR(args->SetInt("padding_z", padding_.z)); - RETURN_IF_ERROR(args->SetInt("kernel_size_z", kernel_size_.z)); - RETURN_IF_ERROR(args->SetInt("dilation_z", dilation_.z)); - } - if (conv_params_.linear_spatial) - { - const int grid_x = - DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x); - RETURN_IF_ERROR(args->SetInt("task_size_x", grid_x)); - } - if (definition_.src_tensors[0].HasAxis(Axis::DEPTH)) - { - const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y); - RETURN_IF_ERROR(args->SetInt("task_size_y", task_size_y)); - } - return absl::OkStatus(); -} - -int3 ConvPowerVR::GetGridSize() const -{ - const int task_size_x = - DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x); - const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y); - const int task_size_z = DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z); - const int task_size_s = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w); - int3 wg; - - if (conv_params_.linear_spatial) - { - int grid_x = task_size_x * task_size_y; - if (definition_.src_tensors[0].HasAxis(Axis::DEPTH)) - { - grid_x *= task_size_z; - } - return int3(grid_x, task_size_s, 1); - } - else - { - int grid_y = task_size_y; - if (definition_.src_tensors[0].HasAxis(Axis::DEPTH)) - { - grid_y *= task_size_z; - } - return int3(task_size_x, grid_y, task_size_s); - } -} - -void ConvPowerVR::GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const -{ - if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || - conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS || - conv_params_.fixed_work_group_size) - { - work_groups->push_back(work_group_size_); - return; - } - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups); -} - -std::string ConvPowerVR::GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def, - bool stride_correction, const ConvParams &conv_params) -{ - auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - AddSrcTensor("src_tensor", src_desc); - if (op_def.src_tensors.size() == 2) - { - // dynamic weights - BufferDescriptor desc; - desc.element_type = op_def.src_tensors[1].data_type; - desc.element_size = 4; - desc.memory_type = - conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM - ? MemoryType::CONSTANT - : MemoryType::GLOBAL; - - AddSrcBuffer("weights", desc); - } - - const auto &src_def = op_def.src_tensors[0]; - - auto generate_id = [&](const std::string &x, const std::string &y, const std::string &z) { - std::string id; - if (src_def.HasAxis(Axis::WIDTH)) - { - id += "_w" + x; - } - if (src_def.HasAxis(Axis::HEIGHT)) - { - id += "_h" + y; - } - if (src_def.HasAxis(Axis::DEPTH)) - { - id += "_d" + z; - } - return id; - }; - - auto generate_id_full = [&](const std::string &x, const std::string &y, const std::string &z, - const std::string &s) { return generate_id(x, y, z) + "_s" + s; }; - - auto generate_check = [&](const std::string &x, const std::string &y, const std::string &z) { - std::string check; - const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH}; - const std::vector<std::string> names{"in_x", "in_y", "in_z"}; - const std::vector<bool> is_1{conv_params_.x_kernel_is_1, conv_params_.y_kernel_is_1, - conv_params_.z_kernel_is_1}; - const std::vector<std::string> coords{x, y, z}; - for (size_t i = 0; i < axes.size(); ++i) - { - const auto &axis = axes[i]; - if (src_def.HasAxis(axis) && !src_def.SupportsZeroClamp(axis) && !is_1[i]) - { - if (!check.empty()) - { - check += " && "; - } - check += names[i] + coords[i]; - } - } - return check; - }; - - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - AddDstTensor("dst_tensor", dst_desc); - - if (!conv_params_.x_kernel_is_1) - { - args_.AddInt("stride_x"); - args_.AddInt("padding_x"); - args_.AddInt("kernel_size_x"); - args_.AddInt("dilation_x"); - } - if (!conv_params_.y_kernel_is_1) - { - args_.AddInt("stride_y"); - args_.AddInt("padding_y"); - args_.AddInt("kernel_size_y"); - args_.AddInt("dilation_y"); - } - if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) - { - args_.AddInt("stride_z"); - args_.AddInt("padding_z"); - args_.AddInt("kernel_size_z"); - args_.AddInt("dilation_z"); - } - if (conv_params_.linear_spatial) - { - args_.AddInt("task_size_x"); - } - if (src_def.HasAxis(Axis::DEPTH)) - { - args_.AddInt("task_size_y"); - } - - const bool need_local_mem = - conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS || - conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP; - - const int local_mem_size = conv_params.block_size.w * 4 * conv_params.src_depth_loop_size; - - const bool use_simd_broadcast = conv_params.IsPrivateMemBroadcast(); - const int simd_size = conv_params.simd_size; - - const bool late_oob_check = need_local_mem || use_simd_broadcast; - - const std::string weights_space = - conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM ? "__constant" - : "__global"; - - const std::string weights_data_type = - conv_params.weights_data_type == DataType::FLOAT32 ? "float4" : "half4"; - - const std::string weights_global_ptr = weights_space + " " + weights_data_type + "*"; - - std::string c = GetCommonDefines(op_def.precision); - if (use_simd_broadcast) - { - if (device_info.cl_version == OpenCLVersion::CL_2_0) - { - c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; - } - else if (device_info.SupportsExtension("cl_intel_subgroups")) - { - c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n"; - } - } - const int4 block_size = conv_params.block_size; - if (conv_params.fixed_work_group_size) - { - c += "__attribute__((reqd_work_group_size(" + std::to_string(work_group_size_.x) + ", " + - std::to_string(work_group_size_.y) + ", " + std::to_string(work_group_size_.z) + ")))\n"; - } - if (use_simd_broadcast && device_info.IsIntel()) - { - c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n"; - } - std::string dst_oob_check; - if (src_def.HasAxis(Axis::DEPTH)) - { - if (conv_params.linear_spatial) - { - dst_oob_check = "DST_Z >= args.dst_tensor.Depth() || DST_S >= " - "args.dst_tensor.Slices()"; - } - else - { - dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Z >= " - "args.dst_tensor.Depth() || DST_S >= args.dst_tensor.Slices()"; - } - } - else - { - if (conv_params.linear_spatial) - { - dst_oob_check = "DST_Y >= args.dst_tensor.Height() || DST_S >= " - "args.dst_tensor.Slices()"; - } - else - { - dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Y >= " - "args.dst_tensor.Height() || DST_S >= args.dst_tensor.Slices()"; - } - } - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += GenerateBlockCoords(conv_params.block_size, work_group_launch_order_, - conv_params.linear_spatial, src_def.HasAxis(Axis::DEPTH)); - if (!late_oob_check) - { - c += " if (" + dst_oob_check + ") {\n"; - c += " return;\n"; - c += " }\n"; - } - if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) - { - if (conv_params.linear_spatial) - { - c += " int lid = get_local_id(0);\n"; - } - else - { - c += " int lid = get_local_id(1) * " + std::to_string(work_group_size_.x) + - " + get_local_id(0);\n"; - } - } - if (use_simd_broadcast) - { - c += " int simd_id = get_sub_group_local_id();\n"; - } - for (int s = 0; s < block_size.w; ++s) - { - const std::string sind = std::to_string(s); - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - c += " ACCUM_FLT4 r" + generate_id_full(xind, yind, zind, sind) + - " = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - } - } - } - } - if (!conv_params_.x_kernel_is_1) - { - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - const std::string xc = "(DST_X + " + xind + ")"; - if (stride_correction) - { - c += " int xc" + xind + " = " + - GetXStrideCorrected(xc, "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") + - ";\n"; - } - else - { - c += " int xc" + xind + " = " + xc + " * args.stride_x + args.padding_x;\n"; - } - } - } - else - { - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - c += " int xc" + xind + " = DST_X + " + xind + ";\n"; - if (!src_def.CanReadOutOfBorder(Axis::WIDTH)) - { - c += " xc" + xind + " = clamp(xc" + xind + ", 0, args.src_tensor.Width() - 1);\n"; - } - } - } - if (!conv_params_.y_kernel_is_1) - { - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - const std::string yc = "(DST_Y + " + yind + ")"; - c += " int yc" + yind + " = " + yc + " * args.stride_y + args.padding_y;\n"; - } - } - else - { - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - c += " int yc" + yind + " = DST_Y + " + yind + ";\n"; - if (!src_def.CanReadOutOfBorder(Axis::HEIGHT)) - { - c += " yc" + yind + " = clamp(yc" + yind + ", 0, args.src_tensor.Height() - 1);\n"; - } - } - } - if (src_def.HasAxis(Axis::DEPTH)) - { - if (!conv_params_.z_kernel_is_1) - { - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - const std::string zc = "(DST_Z + " + zind + ")"; - c += " int zc" + zind + " = " + zc + " * args.stride_z + args.padding_z;\n"; - } - } - else - { - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - c += " int zc" + zind + " = DST_Z + " + zind + ";\n"; - if (!src_def.CanReadOutOfBorder(Axis::DEPTH)) - { - c += " zc" + zind + " = clamp(zc" + zind + ", 0, args.src_tensor.Depth() - 1);\n"; - } - } - } - } - bool trivial_kernel_size = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1; - if (src_def.HasAxis(Axis::DEPTH)) - { - trivial_kernel_size = trivial_kernel_size && conv_params_.z_kernel_is_1; - } - if (need_local_mem) - { - c += " __local " + weights_data_type + " weights_cache[" + std::to_string(local_mem_size) + - "];\n"; - } - else if (conv_params.AreWeightsBuffer()) - { - c += " " + weights_global_ptr + " weights_cache;\n"; - } - else if (!trivial_kernel_size) - { - c += " int filter_offset = 0;\n"; - } - if (conv_params.AreWeightsBuffer()) - { - if (conv_params.different_weights_for_height) - { - c += " " + weights_global_ptr + - " filters_loc = args.weights.GetPtr() + (DST_S * " - "args.src_tensor.Height() + DST_Y * " + - std::to_string(block_size.w) + ") * 4 * args.src_tensor.Slices();\n"; - } - else - { - std::string kernel_spatial_offset = ""; - if (!conv_params_.x_kernel_is_1) - { - kernel_spatial_offset += " * args.kernel_size_x"; - } - if (!conv_params_.y_kernel_is_1) - { - kernel_spatial_offset += " * args.kernel_size_y"; - } - if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) - { - kernel_spatial_offset += " * args.kernel_size_z"; - } - c += " " + weights_global_ptr + - " filters_loc = args.weights.GetPtr() + DST_S * 4 * " - "args.src_tensor.Slices()" + - kernel_spatial_offset + ";\n"; - } - } - if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) - { - c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n"; - for (int z = 0; z < block_size.z; ++z) - { - const std::string zck = "zck" + std::to_string(z); - c += " int zck" + std::to_string(z) + " = kz * args.dilation_z + zc" + std::to_string(z) + - ";\n"; - if (!src_def.SupportsZeroClamp(Axis::DEPTH)) - { - c += " bool in_z" + std::to_string(z) + " = " + zck + " >= 0 && " + zck + - " < args.src_tensor.Depth();\n"; - if (!src_def.CanReadOutOfBorder(Axis::DEPTH)) - { - c += " " + zck + " = clamp(" + zck + ", 0, args.src_tensor.Depth() - 1);\n"; - } - } - } - } - if (!conv_params_.y_kernel_is_1) - { - c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n"; - for (int y = 0; y < block_size.y; ++y) - { - const std::string yck = "yck" + std::to_string(y); - c += " int " + yck + " = ky * args.dilation_y + yc" + std::to_string(y) + ";\n"; - if (!src_def.SupportsZeroClamp(Axis::HEIGHT)) - { - c += " bool in_y" + std::to_string(y) + " = " + yck + " >= 0 && " + yck + - " < args.src_tensor.Height();\n"; - if (!src_def.CanReadOutOfBorder(Axis::HEIGHT)) - { - c += " " + yck + " = clamp(" + yck + ", 0, args.src_tensor.Height() - 1);\n"; - } - } - } - } - if (!conv_params_.x_kernel_is_1) - { - c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n"; - for (int x = 0; x < block_size.x; ++x) - { - const std::string xck = "xck" + std::to_string(x); - c += " int xck" + std::to_string(x) + " = kx * args.dilation_x + xc" + std::to_string(x) + - ";\n"; - if (!src_def.SupportsZeroClamp(Axis::WIDTH)) - { - c += " bool in_x" + std::to_string(x) + " = " + xck + " >= 0 && " + xck + - " < args.src_tensor.Width();\n"; - if (!src_def.CanReadOutOfBorder(Axis::WIDTH)) - { - c += " " + xck + " = clamp(" + xck + ", 0, args.src_tensor.Width() - 1);\n"; - } - } - } - } - const bool need_multiple_slice_strides = - src_def.ReturnsZeroForNegOneRead() && !trivial_kernel_size; - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind; - std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind; - const std::string id = generate_id(xind, yind, zind); - std::string coords = "" + xc + ", " + yc; - if (src_def.HasAxis(Axis::DEPTH)) - { - std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind; - coords += ", " + zc; - } - if (src_def.IsLinear()) - { - c += " args.src_tensor.GetAddress(addr" + id + ", " + coords + ", 0);\n"; - if (need_multiple_slice_strides) - { - const std::string check = generate_check(xind, yind, zind); - c += " addr" + id + " = select(-1, addr" + id + ", (" + check + "));\n"; - c += - " int ds" + id + " = select(0, args.src_tensor.SliceStride(), (" + check + "));\n"; - } - } - } - } - } - if (src_def.IsLinear() && !need_multiple_slice_strides) - { - c += " int ds = args.src_tensor.SliceStride();\n"; - } - - auto declare_src = [&]() { - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - const std::string id = generate_id(xind, yind, zind); - c += " " + weights_data_type + " src" + id + ";\n"; - } - } - } - }; - const bool conditional_read = device_info.IsMali(); - auto read_src = [&]() { - const std::string cl_type = ToCLDataType(conv_params.weights_data_type); - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - std::string id = generate_id(xind, yind, zind); - const std::string check = generate_check(xind, yind, zind); - std::string address; - if (src_def.IsLinear()) - { - address = "addr" + id; - } - else - { - std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind; - std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind; - address = "" + xc + ", " + yc; - if (src_def.HasAxis(Axis::DEPTH)) - { - std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind; - address += ", " + zc; - } - address += ", s"; - } - if (src_def.ReturnsZeroForNegOneRead()) - { - c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n"; - const std::string ds = trivial_kernel_size ? "ds" : "ds" + id; - c += " " + address + " += " + ds + ";\n"; - } - else - { - if (!check.empty()) - { - if (conditional_read) - { - c += " src" + id + " = " + check + " ? args.src_tensor.Read<" + cl_type + ">(" + - address + ") : (FLT4)(0.0f);\n"; - } - else - { - c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + - ") * (FLT)(" + check + ");\n"; - } - } - else - { - c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n"; - } - if (src_def.IsLinear()) - { - c += " " + address + " += ds;\n"; - } - } - } - } - } - }; - const bool weights_type_as_accum_type = !(op_def.precision == CalculationsPrecision::F32_F16 && - conv_params.weights_data_type == DataType::FLOAT16); - auto conv_core = [&](int shared_offset) { - const std::string channels[] = {"x", "y", "z", "w"}; - for (int s = 0; s < block_size.w; ++s) - { - const std::string sind = std::to_string(s); - if (weights_type_as_accum_type) - { - for (int ch = 0; ch < 4; ++ch) - { - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - std::string R = "r" + generate_id_full(xind, yind, zind, sind); - std::string S = "src" + generate_id(xind, yind, zind); - if (use_simd_broadcast) - { - int simd_id = (s * 4 + ch + shared_offset) / simd_size; - int thread_id = (s * 4 + ch + shared_offset) % simd_size; - std::string w_val_x = "sub_group_broadcast(simd_w" + std::to_string(simd_id) + - ".x, " + std::to_string(thread_id) + "u)"; - std::string w_val_y = "sub_group_broadcast(simd_w" + std::to_string(simd_id) + - ".y, " + std::to_string(thread_id) + "u)"; - std::string w_val_z = "sub_group_broadcast(simd_w" + std::to_string(simd_id) + - ".z, " + std::to_string(thread_id) + "u)"; - std::string w_val_w = "sub_group_broadcast(simd_w" + std::to_string(simd_id) + - ".w, " + std::to_string(thread_id) + "u)"; - c += " " + R + ".x += " + w_val_x + " * " + S + "." + channels[ch] + ";\n"; - c += " " + R + ".y += " + w_val_y + " * " + S + "." + channels[ch] + ";\n"; - c += " " + R + ".z += " + w_val_z + " * " + S + "." + channels[ch] + ";\n"; - c += " " + R + ".w += " + w_val_w + " * " + S + "." + channels[ch] + ";\n"; - } - else - { - const std::string weight_id = std::to_string(s * 4 + ch + shared_offset); - std::string w_val; - if (conv_params.AreWeightsBuffer()) - { - w_val = "weights_cache[" + weight_id + "]"; - } - else - { - w_val = "f" + weight_id; - } - c += " " + R + " += " + w_val + " * " + S + "." + channels[ch] + ";\n"; - } - } - } - } - } - } - else - { // F32_F16 precision and weights type is float16 - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - std::string R = "r" + generate_id_full(xind, yind, zind, sind); - std::string S = "src" + generate_id(xind, yind, zind); - std::vector<std::string> F(4); - for (int i = 0; i < 4; ++i) - { - std::string weight_id = std::to_string(s * 4 + i + shared_offset); - if (conv_params.AreWeightsBuffer()) - { - F[i] = "weights_cache[" + weight_id + "]"; - } - else - { - F[i] = "f" + weight_id; - } - } - c += " " + R + " += convert_float4(" + S + ".x * " + F[0] + " + " + S + ".y * " + - F[1] + " + " + S + ".z * " + F[2] + " + " + S + ".w * " + F[3] + ");\n"; - } - } - } - } - } - }; - - c += " int s = 0;\n"; - c += " do {\n"; - declare_src(); - const int total_work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z; - if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) - { - c += GenerateAsyncUpload("weights_cache", "filters_loc", - /*global_offset_name*/ "", local_mem_size); - } - else if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) - { - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - c += - GenerateUploadByThreads("weights_cache", "filters_loc", - /*global_offset_name*/ "", "lid", total_work_items, local_mem_size); - } - else if (use_simd_broadcast) - { - int parts = local_mem_size / simd_size; - int reminder = local_mem_size % simd_size; - for (int i = 0; i < parts; ++i) - { - c += " FLT4 simd_w" + std::to_string(i) + " = filters_loc[simd_id + " + - std::to_string(i * simd_size) + "];\n"; - } - if (reminder) - { - c += " FLT4 simd_w" + std::to_string(parts) + ";\n"; - c += " if (simd_id < " + std::to_string(reminder) + ") {\n"; - c += " simd_w" + std::to_string(parts) + " = filters_loc[simd_id + " + - std::to_string(parts * simd_size) + "];\n"; - c += " }\n"; - } - } - else if (conv_params.AreWeightsBuffer()) - { // GLOBAL_MEM/CONSTANT_MEM - c += " weights_cache = filters_loc;\n"; - } - else - { // TEXTURES_MEM - for (int dst_s = 0; dst_s < block_size.w; ++dst_s) - { - std::string f_y = trivial_kernel_size ? "s" : "filter_offset"; - if (conv_params.different_weights_for_height) - { - f_y = "DST_Y * args.src_tensor.Slices() + s"; - } - c += absl::Substitute( - R"( FLT4 f$2 = args.weights0.Read(DST_S + $0, $1); - FLT4 f$3 = args.weights1.Read(DST_S + $0, $1); - FLT4 f$4 = args.weights2.Read(DST_S + $0, $1); - FLT4 f$5 = args.weights3.Read(DST_S + $0, $1); -)", - dst_s, f_y, dst_s * 4 + 0, dst_s * 4 + 1, dst_s * 4 + 2, dst_s * 4 + 3); - } - if (!trivial_kernel_size) - { - c += " filter_offset++;\n"; - } - } - read_src(); - c += " s += 1;\n"; - if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) - { - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - } - conv_core(0); - for (int i = 1; i < conv_params.src_depth_loop_size; ++i) - { - read_src(); - conv_core(i * block_size.w * 4); - c += " s += 1;\n"; - } - if (conv_params.AreWeightsBuffer()) - { - c += " filters_loc += " + std::to_string(local_mem_size) + ";\n"; - } - c += " } while (s < args.src_tensor.Slices());\n"; - if (!conv_params.x_kernel_is_1) - { - c += " };\n"; - } - if (!conv_params.y_kernel_is_1) - { - c += " };\n"; - } - if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) - { - c += " };\n"; - } - if (conv_params.AreWeightsBuffer()) - { - if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) - { - c += GenerateAsyncUpload("weights_cache", "args.biases.GetPtr()", "DST_S", block_size.w); - } - else if (conv_params.weights_upload_type == - ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) - { - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - c += GenerateUploadByThreads("weights_cache", "args.biases.GetPtr()", "DST_S", "lid", - total_work_items, block_size.w); - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - } - else - { - c += " weights_cache = args.biases.GetPtr() + DST_S;\n"; - } - } - if (late_oob_check) - { - c += " if (" + dst_oob_check + ") {\n"; - c += " return;\n"; - c += " }\n"; - } - - auto generate_dst_check = [&](int x, int y, int z) { - std::string check; - const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH}; - const std::vector<std::string> names{"Width()", "Height()", "Depth()"}; - std::vector<std::string> coords(3); - coords[0] = "DST_X + " + std::to_string(x); - coords[1] = "DST_Y + " + std::to_string(y); - coords[2] = "DST_Z + " + std::to_string(z); - const std::vector<int> ids{x, y, z}; - for (size_t i = 0; i < axes.size(); ++i) - { - const auto &axis = axes[i]; - if (src_def.HasAxis(axis) && ids[i] != 0) - { - if (!check.empty()) - { - check += " && "; - } - check += coords[i] + " < args.dst_tensor." + names[i]; - } - } - return check; - }; - - for (int s = 0; s < block_size.w; ++s) - { - const std::string sind = std::to_string(s); - c += " if (DST_S + " + sind + " >= args.dst_tensor.Slices()) return;\n"; - c += " {\n"; - if (conv_params.AreWeightsBuffer()) - { - c += " FLT4 bias_val = TO_FLT4(weights_cache[" + sind + "]);\n"; - } - else - { - c += " FLT4 bias_val = args.biases.Read(DST_S + " + sind + ");\n"; - } - for (int z = 0; z < block_size.z; ++z) - { - const std::string zind = std::to_string(z); - for (int y = 0; y < block_size.y; ++y) - { - const std::string yind = std::to_string(y); - for (int x = 0; x < block_size.x; ++x) - { - const std::string xind = std::to_string(x); - const std::string id = generate_id_full(xind, yind, zind, sind); - const std::string check = generate_dst_check(x, y, z); - std::string coords = "DST_X + " + xind + ", DST_Y + " + yind; - if (src_def.HasAxis(Axis::DEPTH)) - { - coords += ", DST_Z + " + zind; - } - coords += ", DST_S + " + sind; - if (!check.empty()) - { - c += " if (" + check + ") {\n"; - } - else - { - c += " {\n"; - } - c += " FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n"; - c += " args.dst_tensor.Write(res, " + coords + ");\n"; - c += " }\n"; - } - } - } - c += " }\n"; - } - c += "}\n"; - return c; -} - -ConvPowerVR::ConvParams -ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, - bool different_weights_for_height, const BHWC *dst_shape) -{ - ConvParams conv_params; - conv_params.linear_spatial = false; - conv_params.weights_data_type = DeduceDataTypeFromPrecision(definition.precision); - conv_params.x_kernel_is_1 = x_kernel_is_1; - conv_params.y_kernel_is_1 = y_kernel_is_1; - conv_params.different_weights_for_height = different_weights_for_height; - if (device_info.IsNvidia()) - { - if (different_weights_for_height) - { - work_group_size_ = int3(32, 1, 1); - work_group_launch_order_ = int3(2, 0, 1); - conv_params.fixed_work_group_size = true; - } - else - { - conv_params.linear_spatial = true; - work_group_size_ = int3(32, 1, 1); - work_group_launch_order_ = int3(1, 0, 2); - conv_params.fixed_work_group_size = true; - } - conv_params.block_size = int4(2, 1, 1, 4); - conv_params.src_depth_loop_size = 1; - conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; - if (dst_depth % 4 == 0 || dst_depth >= 8) - { - conv_params.block_size.w = 4; - } - else if (dst_depth % 2 == 0 || dst_depth >= 4) - { - conv_params.block_size.w = 2; - } - else - { - conv_params.block_size.w = dst_depth; - } - if (dst_shape) - { - int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; - float task_size_per_cu = static_cast<float>(task_size) / device_info.compute_units_count; - int block_size = - conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w; - float threads_per_cu = task_size_per_cu / block_size; - float warps_per_cu = threads_per_cu / 32 /*warp_size*/; - if (warps_per_cu < 8.0f) - { - conv_params.block_size.x = 1; - } - if (warps_per_cu < 4.0f && conv_params.block_size.w >= 4) - { - conv_params.block_size.w /= 2; - } - if (warps_per_cu < 2.0f && conv_params.block_size.w >= 2) - { - conv_params.block_size.w /= 2; - } - } - if (src_depth % 2 == 0) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0 && conv_params.block_size.w <= 2) - { - conv_params.src_depth_loop_size = 4; - } - } - else if (device_info.IsPowerVR()) - { - if (different_weights_for_height) - { - work_group_size_ = int3(32, 1, 1); - work_group_launch_order_ = int3(2, 0, 1); - conv_params.fixed_work_group_size = true; - } - else - { - conv_params.linear_spatial = true; - work_group_size_ = int3(32, 1, 1); - work_group_launch_order_ = int3(1, 0, 2); - conv_params.fixed_work_group_size = true; - } - conv_params.weights_data_type = - definition.precision == CalculationsPrecision::F16 ? DataType::FLOAT16 : DataType::FLOAT32; - conv_params.block_size = int4(1, 1, 1, 4); - conv_params.src_depth_loop_size = 1; - conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP; - if (dst_depth % 8 == 0 || dst_depth >= 32) - { - conv_params.block_size.w = 8; - } - else if (dst_depth % 4 == 0 || dst_depth >= 8) - { - conv_params.block_size.w = 4; - } - else if (dst_depth % 2 == 0 || dst_depth >= 4) - { - conv_params.block_size.w = 2; - } - else - { - conv_params.block_size.w = dst_depth; - } - if (definition.precision == CalculationsPrecision::F16) - { - conv_params.block_size.w = std::min(4, conv_params.block_size.w); - if (src_depth % 2 == 0) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0 && conv_params.block_size.w <= 2) - { - conv_params.src_depth_loop_size = 4; - } - if (conv_params.block_size.w == 1) - { - if (src_depth % 2 == 0) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0) - { - conv_params.src_depth_loop_size = 4; - } - if (src_depth <= 8) - { - conv_params.src_depth_loop_size = src_depth; - } - } - conv_params.block_size.x = 2; - } - } - else if (device_info.IsAMD()) - { - if (different_weights_for_height) - { - work_group_size_ = int3(32, 1, 1); - work_group_launch_order_ = int3(2, 0, 1); - conv_params.fixed_work_group_size = true; - } - else - { - work_group_size_ = int3(8, 4, 1); - work_group_launch_order_ = int3(2, 0, 1); - conv_params.fixed_work_group_size = true; - } - - conv_params.block_size = int4(2, 1, 1, 1); - if (x_kernel_is_1 && y_kernel_is_1) - { - conv_params.block_size.y = 2; - } - conv_params.src_depth_loop_size = 1; - conv_params.weights_upload_type = WeightsUploadType::CONSTANT_MEM; - if (dst_depth % 8 == 0 || dst_depth >= 32) - { - conv_params.block_size.w = 8; - } - else if (dst_depth % 4 == 0 || dst_depth >= 8) - { - conv_params.block_size.w = 4; - } - else if (dst_depth % 2 == 0 || dst_depth >= 4) - { - conv_params.block_size.w = 2; - } - else - { - conv_params.block_size.w = 1; - } - if (src_depth % 2 == 0 && src_depth >= 16) - { - conv_params.src_depth_loop_size = 2; - } - } - else if (device_info.IsMali()) - { - int block_size = 2; - if (dst_shape) - { - int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; - block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size); - } - if (!x_kernel_is_1 || !y_kernel_is_1) - { - block_size = std::min(block_size, 4); - } - if (block_size == 8) - { - if (dst_depth == 1 || dst_depth == 3) - { - conv_params.block_size = int4(2, 2, 1, 1); - } - else - { - conv_params.block_size = int4(2, 2, 1, 2); - } - } - else if (block_size == 4) - { - if (dst_depth == 1 || dst_depth == 3) - { - conv_params.block_size = int4(2, 2, 1, 1); - } - else - { - conv_params.block_size = int4(2, 1, 1, 2); - } - } - else if (block_size == 2) - { - conv_params.block_size = int4(2, 1, 1, 1); - } - else - { - conv_params.block_size = int4(1, 1, 1, 1); - } - conv_params.src_depth_loop_size = 1; - MaliInfo mali_info = device_info.mali_info; - if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0 && block_size == 1 && !mali_info.IsMidgard() && - definition.precision == CalculationsPrecision::F16) - { - conv_params.src_depth_loop_size = 4; - } - work_group_size_ = int3(4, 4, 1); - work_group_launch_order_ = int3(0, 1, 2); - conv_params.fixed_work_group_size = false; - conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; - } - else if (device_info.IsAdreno()) - { - conv_params.block_size = int4(2, 2, 1, 2); - if (device_info.IsAdreno3xx()) - { - if (definition.precision == CalculationsPrecision::F16) - { - conv_params.block_size = int4(2, 2, 1, 2); - } - else if (definition.precision == CalculationsPrecision::F32_F16) - { - conv_params.block_size = int4(2, 1, 1, 2); - } - else - { // F32 - conv_params.block_size = int4(2, 2, 1, 1); - } - } - work_group_size_ = int3(8, 2, 1); - work_group_launch_order_ = int3(0, 1, 2); - conv_params.fixed_work_group_size = false; - conv_params.src_depth_loop_size = 1; - if (definition.src_tensors.size() == 2) - { - // dynamic weights supported only with buffers. - conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; - } - else - { - conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM_X4; - } - } - else if (device_info.IsIntel()) - { - if (different_weights_for_height) - { - work_group_size_ = int3(16, 1, 1); - work_group_launch_order_ = int3(0, 1, 2); - conv_params.fixed_work_group_size = true; - } - else - { - conv_params.linear_spatial = true; - work_group_size_ = int3(16, 1, 1); - work_group_launch_order_ = int3(0, 1, 2); - conv_params.fixed_work_group_size = true; - } - conv_params.block_size = int4(1, 1, 1, 4); - conv_params.src_depth_loop_size = 1; - int sub_group_size = 16; - const bool supports_subgroups = device_info.SupportsExtension("cl_khr_subgroups") || - device_info.SupportsExtension("cl_intel_subgroups"); - if (definition.precision != CalculationsPrecision::F32_F16 && supports_subgroups && - device_info.SupportsExtension("cl_intel_required_subgroup_size") && - device_info.SupportsSubGroupWithSize(sub_group_size)) - { - conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST; - conv_params.simd_size = sub_group_size; - } - else - { - conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; - } - if (dst_depth % 4 == 0 || dst_depth >= 8) - { - conv_params.block_size.w = 4; - } - else if (dst_depth % 2 == 0 || dst_depth >= 4) - { - conv_params.block_size.w = 2; - } - else - { - conv_params.block_size.w = dst_depth; - } - if (src_depth % 2 == 0) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0 && conv_params.block_size.w <= 2) - { - conv_params.src_depth_loop_size = 4; - } - } - else - { - conv_params.block_size = int4(1, 1, 1, 4); - work_group_size_ = int3(8, 2, 1); - work_group_launch_order_ = int3(0, 1, 2); - conv_params.fixed_work_group_size = false; - conv_params.src_depth_loop_size = 1; - conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; - if (dst_depth % 4 == 0 || dst_depth >= 8) - { - conv_params.block_size.w = 4; - } - else if (dst_depth % 2 == 0 || dst_depth >= 4) - { - conv_params.block_size.w = 2; - } - else - { - conv_params.block_size.w = dst_depth; - } - if (src_depth % 2 == 0) - { - conv_params.src_depth_loop_size = 2; - } - if (src_depth % 4 == 0 && conv_params.block_size.w <= 2) - { - conv_params.src_depth_loop_size = 4; - } - } - - return conv_params; -} - -ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 && - attr.dilations.w == 1 && attr.padding.prepended.w == 0 && - attr.padding.appended.w == 0; - const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 && - attr.dilations.h == 1 && attr.padding.prepended.h == 0 && - attr.padding.appended.h == 0; - return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1, - y_kernel_is_1, false, dst_shape); -} - -ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution3DAttributes &attr, - const BHWDC *dst_shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 && - attr.dilations.w == 1 && attr.padding.prepended.w == 0 && - attr.padding.appended.w == 0; - const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 && - attr.dilations.h == 1 && attr.padding.prepended.h == 0 && - attr.padding.appended.h == 0; - const bool z_kernel_is_1 = attr.weights.shape.d == 1 && attr.strides.d == 1 && - attr.dilations.d == 1 && attr.padding.prepended.d == 0 && - attr.padding.appended.d == 0; - - ConvPowerVR::ConvParams result; - BHWC shape; - if (dst_shape) - { - shape.b = dst_shape->b; - shape.h = dst_shape->h * dst_shape->d; - shape.w = dst_shape->w; - shape.c = dst_shape->c; - result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1, - y_kernel_is_1, false, &shape); - } - else - { - result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1, - y_kernel_is_1, false, nullptr); - } - result.z_kernel_is_1 = z_kernel_is_1; - return result; -} - -ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, - const BHWC *dst_shape) -{ - const int dst_depth = DivideRoundUp(weights_shape.b, 4); - const int src_depth = DivideRoundUp(weights_shape.c, 4); - const bool x_kernel_is_1 = weights_shape.w == 1 && attr.strides.w == 1 && attr.dilations.w == 1 && - attr.padding.prepended.w == 0 && attr.padding.appended.w == 0; - const bool y_kernel_is_1 = weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 && - attr.padding.prepended.h == 0 && attr.padding.appended.h == 0; - return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1, - y_kernel_is_1, false, dst_shape); -} - -ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, - const OperationDef &definition, - const FullyConnectedAttributes &attr, - const BHWC *dst_shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvPowerVR::ConvParams params = - GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, false, dst_shape); - work_group_size_.x *= work_group_size_.y; - work_group_size_.y = 1; - params.block_size.x *= params.block_size.y; - params.block_size.y = 1; - return params; -} - -ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape) -{ - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvPowerVR::ConvParams params = - GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, true, dst_shape); - params.block_size.x *= params.block_size.y; - params.block_size.y = 1; - return params; -} - -ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *dst_shape) -{ - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); - result.UploadData(attr.weights, attr.bias); - return result; -} - -ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition, - const FullyConnectedAttributes &attr, const BHWC *dst_shape) -{ - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); - result.UploadData(attr.weights, attr.bias); - return result; -} - -ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, const BHWC *dst_shape) -{ - ConvPowerVR result(definition, attr, weights_shape, device_info, dst_shape); - result.GenerateCode(device_info); - result.UploadBias(attr.bias); - return result; -} - -ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape) -{ - ConvPowerVR result(definition); - result.conv_params_ = result.GuessBestParamsWinograd(device_info, definition, attr, dst_shape); - result.GenerateCode(device_info); - result.UploadDataForWinograd4x4To6x6(attr.weights); - return result; -} - -ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution3DAttributes &attr, const BHWDC *dst_shape) -{ - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); - result.UploadWeights(attr.weights); - result.UploadBias(attr.bias); - return result; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h deleted file mode 100644 index f83f05730..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__ - -#include <cstring> -#include <vector> - -#include "open_cl/Buffer.h" -#include "open_cl/ClDevice.h" -#include "open_cl/kernels/ConvCommon.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/LinearStorage.h" -#include "open_cl/Tensor.h" -#include "open_cl/Texture2d.h" -#include "open_cl/Util.h" -#include "open_cl/DataType.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" -#include "open_cl/WinogradUtil.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class ConvPowerVR : public GPUOperation -{ -public: - ConvPowerVR() = default; - void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const override; - absl::Status BindArguments(ArgumentsBinder *args) override; - int3 GetGridSize() const override; - - ConvWeightsDescription GetConvWeightsDescription() const - { - ConvWeightsDescription desc; - desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4; - desc.output_group_size = conv_params_.block_size.w; - return desc; - } - - // Move only - ConvPowerVR(ConvPowerVR &&operation); - ConvPowerVR &operator=(ConvPowerVR &&operation); - ConvPowerVR(const ConvPowerVR &) = delete; - ConvPowerVR &operator=(const ConvPowerVR &) = delete; - -private: - enum class WeightsUploadType - { - LOCAL_MEM_ASYNC_SUBGROUP, // we use it for PowerVR with workgroup size = 32 - LOCAL_MEM_BY_THREADS, - GLOBAL_MEM, - CONSTANT_MEM, - PRIVATE_MEM_SIMD_BROADCAST, - TEXTURES_MEM_X4, // 4 textures for weights - }; - - struct ConvParams - { - // Usually we use this combinations for CalculationPrecision: - // F32: all F32 - // F16: all F16 - // F32_F16: all besides accumulator is F16, including weights - // But for PowerVR we can achieve better performance in F32_F16 with F32 - // weights, so for PowerVR in this kernel we have F32 weights for - // F32_F16 precision mode - DataType weights_data_type; // used for weights and biases - int4 block_size; // WHDS - bool fixed_work_group_size; - bool linear_spatial; // spatial dimensions are Width/Height/Depth - bool different_weights_for_height; - int src_depth_loop_size; - WeightsUploadType weights_upload_type; - bool x_kernel_is_1; - bool y_kernel_is_1; - bool z_kernel_is_1; - - // used only with PRIVATE_MEM_SIMD_BROADCAST - int simd_size = 1; - - bool AreWeightsBuffer() const - { - return weights_upload_type != WeightsUploadType::TEXTURES_MEM_X4; - } - - bool IsPrivateMemBroadcast() const - { - return weights_upload_type == WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST; - } - }; - - ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr, - const DeviceInfo &device_info, const BHWC *dst_shape = nullptr); - ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr, - const BHWC &weights_shape, const DeviceInfo &device_info, - const BHWC *dst_shape = nullptr); - ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr, - const DeviceInfo &device_info, const BHWC *dst_shape = nullptr); - explicit ConvPowerVR(const OperationDef &definition); - ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr, - const DeviceInfo &device_info, const BHWDC *dst_shape = nullptr); - - void GenerateCode(const DeviceInfo &device_info); - - template <DataType T> - void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases); - template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights); - - template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights); - - template <DataType T> void UploadWeights(const InternalTensor<OHWDI, T> &weights); - - template <DataType T> void UploadBias(const InternalTensor<Linear, T> &bias); - - friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *dst_shape); - - friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, - const OperationDef &definition, - const FullyConnectedAttributes &attr, const BHWC *dst_shape); - - friend ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, - const BHWC *dst_shape); - - friend ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape); - - friend ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution3DAttributes &attr, - const BHWDC *dst_shape); - - ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC &weights_shape, - const BHWC *dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - const FullyConnectedAttributes &attr, const BHWC *dst_shape = nullptr); - ConvParams GuessBestParamsWinograd(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution3DAttributes &attr, const BHWDC *dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition, - int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, - bool different_weights_for_height, const BHWC *dst_shape = nullptr); - - std::string GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def, - bool stride_correction, const ConvParams &conv_params); - - int4 stride_; - int4 padding_; - int4 kernel_size_; - int4 dilation_; - ConvParams conv_params_; -}; - -template <DataType T> -void ConvPowerVR::UploadData(const InternalTensor<OHWI, T> &weights, - const InternalTensor<Linear, T> &biases) -{ - UploadWeights(weights); - UploadBias(biases); -} - -template <DataType T> -void ConvPowerVR::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights) -{ - InternalTensor<OHWI, T> wino_weights; - RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights); - UploadWeights(wino_weights); - InternalTensor<Linear, DataType::FLOAT32> biases; - biases.shape = Linear(weights.shape.o); - biases.data.resize(weights.shape.o, 0.0f); - UploadBias(biases); -} - -template <DataType T> void ConvPowerVR::UploadBias(const InternalTensor<Linear, T> &bias) -{ - BufferDescriptor desc; - desc.element_type = conv_params_.weights_data_type; - desc.element_size = 4; - desc.memory_type = - conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM - ? MemoryType::CONSTANT - : MemoryType::GLOBAL; - const int float_size = sizeof(float); - // TODO - // conv_params_.weights_data_type == DataType::FLOAT32 ? sizeof(float) : sizeof(half); - int aligned_channels = AlignByN(bias.shape.v, 4 * conv_params_.block_size.w); - desc.size = float_size * aligned_channels; - desc.data.resize(desc.size); - if (conv_params_.weights_data_type == DataType::FLOAT32) - { - float *gpu_data = reinterpret_cast<float *>(desc.data.data()); - for (int i = 0; i < aligned_channels; ++i) - { - gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f; - } - } - // else - // { - // half *gpu_data = reinterpret_cast<half *>(desc.data.data()); - // for (int i = 0; i < aligned_channels; ++i) - // { - // gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f; - // } - // } - args_.AddObject("biases", absl::make_unique<BufferDescriptor>(std::move(desc))); -} - -template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWI, T> &weights) -{ - const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), conv_params_.block_size.w); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - - const bool f32_weights = conv_params_.weights_data_type == DataType::FLOAT32; - const int float4_size = sizeof(float4); - // TODO - // f32_weights ? sizeof(float4) : sizeof(half4); - - const int elements_count = weights.shape.h * weights.shape.w * src_slices * dst_slices * 4; - - std::vector<uint8_t> data(float4_size * elements_count); - - if (f32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(data.data()); - if (conv_params_.AreWeightsBuffer()) - { - RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w, - absl::MakeSpan(ptr, elements_count)); - } - else - { - RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w, - absl::MakeSpan(ptr, elements_count)); - } - } - // else - // { - // half4 *ptr = reinterpret_cast<half4 *>(data.data()); - // if (conv_params_.AreWeightsBuffer()) - // { - // RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w, - // absl::MakeSpan(ptr, elements_count)); - // } - // else - // { - // RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w, - // absl::MakeSpan(ptr, elements_count)); - // } - // } - if (conv_params_.AreWeightsBuffer()) - { - BufferDescriptor desc; - desc.element_type = conv_params_.weights_data_type; - desc.element_size = 4; - desc.memory_type = - conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM - ? MemoryType::CONSTANT - : MemoryType::GLOBAL; - desc.size = float4_size * elements_count; - desc.data = std::move(data); - args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc))); - } - else - { - const int texture_width = dst_slices; - const int texture_height = src_slices * weights.shape.h * weights.shape.w; - const int sub_size = float4_size * texture_width * texture_height; - for (int i = 0; i < 4; ++i) - { - Texture2DDescriptor desc; - desc.element_type = conv_params_.weights_data_type; - desc.size = int2(texture_width, texture_height); - desc.data.resize(sub_size); - std::memcpy(desc.data.data(), data.data() + sub_size * i, sub_size); - const std::string name = "weights" + std::to_string(i); - args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc))); - } - } -} - -template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWDI, T> &weights) -{ - const int block_size = conv_params_.block_size.w; - const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), block_size); - const int src_slices = DivideRoundUp(weights.shape.i, 4); - - const int elements_count = - weights.shape.d * weights.shape.h * weights.shape.w * src_slices * dst_slices * 4; - const bool f32_weights = definition_.precision == CalculationsPrecision::F32; - - const int float4_size = f32_weights ? 16 : 8; - - std::vector<uint8_t> data(float4_size * elements_count); - - if (f32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(data.data()); - if (conv_params_.AreWeightsBuffer()) - { - RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w, - absl::MakeSpan(ptr, elements_count)); - } - else - { - RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w, - absl::MakeSpan(ptr, elements_count)); - } - } - // else - // { - // half4 *ptr = reinterpret_cast<half4 *>(data.data()); - // if (conv_params_.AreWeightsBuffer()) - // { - // RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w, - // absl::MakeSpan(ptr, elements_count)); - // } - // else - // { - // RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w, - // absl::MakeSpan(ptr, elements_count)); - // } - // } - - if (conv_params_.AreWeightsBuffer()) - { - BufferDescriptor desc; - desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 4; - desc.size = float4_size * elements_count; - desc.data = std::move(data); - args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc))); - } - else - { - const int texture_width = dst_slices; - const int texture_height = src_slices * weights.shape.d * weights.shape.h * weights.shape.w; - int sub_size = float4_size * texture_width * texture_height; - for (int i = 0; i < 4; ++i) - { - Texture2DDescriptor desc; - desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.size = int2(texture_width, texture_height); - desc.data.resize(sub_size); - memcpy(desc.data.data(), data.data() + sub_size * i, sub_size); - const std::string name = "weights" + std::to_string(i); - args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc))); - } - } -} - -ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr); - -ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition, - const FullyConnectedAttributes &attr, - const BHWC *dst_shape = nullptr); - -ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC &weights_shape, - const BHWC *dst_shape = nullptr); - -ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info, - const OperationDef &definition, - const Convolution2DAttributes &attr, - const BHWC *dst_shape = nullptr); - -ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition, - const Convolution3DAttributes &attr, - const BHWDC *dst_shape = nullptr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc deleted file mode 100644 index 95172bd05..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "open_cl/kernels/ConvWeightsConverter.h" - -#include <string> - -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -ConverterToConvWeights::ConverterToConvWeights(const OperationDef &definition, - const ConvWeightsDescription &conv_weights_desc) - : GPUOperation(definition), conv_weights_desc_(conv_weights_desc) -{ - code_ = GetConverterToConvWeightsCode(definition_, conv_weights_desc_); -} - -ConverterToConvWeights::ConverterToConvWeights(ConverterToConvWeights &&operation) - : GPUOperation(std::move(operation)), conv_weights_desc_(operation.conv_weights_desc_) -{ -} - -ConverterToConvWeights &ConverterToConvWeights::operator=(ConverterToConvWeights &&operation) -{ - if (this != &operation) - { - conv_weights_desc_ = operation.conv_weights_desc_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -std::string ConverterToConvWeights::GetConverterToConvWeightsCode( - const OperationDef &op_def, const ConvWeightsDescription &conv_weights_desc) -{ - AddSrcTensor("src_tensor", op_def.src_tensors[0]); - AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - args_.AddFloat("mask_x"); - args_.AddFloat("mask_y"); - args_.AddFloat("mask_z"); - args_.AddFloat("mask_w"); - - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int GROUP_SIZE = " + std::to_string(conv_weights_desc.output_group_size) + ";\n"; - c += " int O = get_global_id(0) * 4;\n"; - c += " int I = get_global_id(1);\n"; - c += " int Z = get_global_id(2);\n"; - c += " int W = Z % args.src_tensor.Width();\n"; - c += " int H = Z / args.src_tensor.Width();\n"; - c += " if (O >= args.src_tensor.Batch() || I >= args.src_tensor.Slices() || " - "H >= args.src_tensor.Height()) return;\n"; - c += " FLT4 v0 = args.src_tensor.Read(W, H, I, O + 0);\n"; - c += " FLT4 v1 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - c += " FLT4 v2 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - c += " FLT4 v3 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - c += " if (O + 1 < args.src_tensor.Batch()) {\n"; - c += " v1 = args.src_tensor.Read(W, H, I, O + 1);\n"; - c += " }\n"; - c += " if (O + 2 < args.src_tensor.Batch()) {\n"; - c += " v2 = args.src_tensor.Read(W, H, I, O + 2);\n"; - c += " }\n"; - c += " if (O + 3 < args.src_tensor.Batch()) {\n"; - c += " v3 = args.src_tensor.Read(W, H, I, O + 3);\n"; - c += " }\n"; - c += " if (I == args.src_tensor.Slices() - 1) {\n"; - c += " FLT4 mask = (FLT4)(args.mask_x, args.mask_y, args.mask_z, " - "args.mask_w);\n"; - c += " v0 *= mask;\n"; - c += " v1 *= mask;\n"; - c += " v2 *= mask;\n"; - c += " v3 *= mask;\n"; - c += " }\n"; - c += " FLT4 r0 = (FLT4)(v0.x, v1.x, v2.x, v3.x);\n"; - c += " FLT4 r1 = (FLT4)(v0.y, v1.y, v2.y, v3.y);\n"; - c += " FLT4 r2 = (FLT4)(v0.z, v1.z, v2.z, v3.z);\n"; - c += " FLT4 r3 = (FLT4)(v0.w, v1.w, v2.w, v3.w);\n"; - c += " int d_index = O / (GROUP_SIZE * 4);\n"; - c += " int k_index = (O % (GROUP_SIZE * 4)) / 4;\n"; - c += " int dst_offset = (((d_index * args.src_tensor.Height() + H) * " - "args.src_tensor.Width() + W) * " - "args.src_tensor.Slices() + I) * GROUP_SIZE + " - "k_index;\n"; - c += " int address0 = dst_offset * 4 + 0;\n"; - c += " int address1 = dst_offset * 4 + 1;\n"; - c += " int address2 = dst_offset * 4 + 2;\n"; - c += " int address3 = dst_offset * 4 + 3;\n"; - c += " args.dst_tensor.WriteLinear(r0, dst_offset * 4 + 0)\n;"; - c += " args.dst_tensor.WriteLinear(r1, dst_offset * 4 + 1)\n;"; - c += " args.dst_tensor.WriteLinear(r2, dst_offset * 4 + 2)\n;"; - c += " args.dst_tensor.WriteLinear(r3, dst_offset * 4 + 3)\n;"; - c += "}\n"; - return c; -} - -absl::Status ConverterToConvWeights::BindArguments(ArgumentsBinder *args) -{ - float4 mask = GetMaskForLastPlane(src_[0]->Channels()); - RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x)); - RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y)); - RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z)); - return args->SetFloat("mask_w", mask.w); -} - -int3 ConverterToConvWeights::GetGridSize() const -{ - const int grid_x = - DivideRoundUp(AlignByN(src_[0]->Batch(), 4 * conv_weights_desc_.output_group_size), 4); - const int grid_y = src_[0]->Slices(); - const int grid_z = src_[0]->Width() * src_[0]->Height(); - return int3(grid_x, grid_y, grid_z); -} - -ConverterToConvWeights CreateConverterToConvWeights(const OperationDef &definition, - const ConvWeightsDescription &conv_weights_desc) -{ - return ConverterToConvWeights(definition, conv_weights_desc); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h deleted file mode 100644 index bb68977eb..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__ - -#include "open_cl/ClCommandQueue.h" -#include "open_cl/ClKernel.h" -#include "open_cl/kernels/ConvCommon.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/Status.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class ConverterToConvWeights : public GPUOperation -{ -public: - ConverterToConvWeights(const OperationDef &definition, - const ConvWeightsDescription &conv_weights_desc); - absl::Status BindArguments(ArgumentsBinder *args) override; - int3 GetGridSize() const override; - - // Move only - ConverterToConvWeights(ConverterToConvWeights &&operation); - ConverterToConvWeights &operator=(ConverterToConvWeights &&operation); - ConverterToConvWeights(const ConverterToConvWeights &) = delete; - ConverterToConvWeights &operator=(const ConverterToConvWeights &) = delete; - -private: - std::string GetConverterToConvWeightsCode(const OperationDef &op_def, - const ConvWeightsDescription &conv_weights_desc); - - ConvWeightsDescription conv_weights_desc_; -}; - -// We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I -// as dst we expect Tensor with storage type BUFFER and -// dst.b * dst.h * dst.w * dst.c = AlignByN(src.b, 4) * src.h * src.w -// AlignByN(src.c, 4) -ConverterToConvWeights -CreateConverterToConvWeights(const OperationDef &definition, - const ConvWeightsDescription &conv_weights_desc); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc deleted file mode 100644 index cc2bc41d4..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc +++ /dev/null @@ -1,592 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Converter.h" - -#include <algorithm> -#include <array> -#include <string> - -#include "open_cl/Arguments.h" -#include "open_cl/ClCommandQueue.h" -#include "open_cl/ClErrors.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/Precision.h" -#include "open_cl/InternalTensor.h" -#include "open_cl/TensorType.h" -#include "open_cl/TensorTypeUtil.h" -#include "open_cl/Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -class OpenClConverterImpl : public TensorObjectConverter -{ -public: - virtual absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) = 0; - -protected: - absl::Status DispatchKernel(cl_mem buffer_mem, Tensor *tensor) - { - kernel_.ResetBindingCounter(); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem)); - RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter())); - const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices()); - const int3 work_group_size = {16, 8, 1}; - const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size); - return queue_->Dispatch(kernel_, work_groups_count, work_group_size); - } - - Arguments args_; - BHWC shape_; - CLKernel kernel_; - TensorDescriptor tensor_descriptor_; - CLCommandQueue *queue_ = nullptr; - const CLContext *context_ = nullptr; -}; - -bool IsSupportedDataType(DataType type) -{ - return type == DataType::FLOAT16 || type == DataType::FLOAT32; -} - -bool IsBHWCOpenCLBuffer(const ObjectDef &def) -{ - return IsSupportedDataType(def.data_type) && def.object_type == ObjectType::OPENCL_BUFFER && - def.data_layout == DataLayout::BHWC; -} - -bool IsOpenCLTensor(const ObjectDef &def) -{ - const bool is_buffer_tensor = - def.object_type == ObjectType::OPENCL_BUFFER && def.data_layout == DataLayout::DHWC4; - const bool is_image2d_tensor = - def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::HDWC4; - const bool is_image2d_array_tensor = - def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::DHWC4; - const bool is_single_image_tensor = - def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::BHWC; - return IsSupportedDataType(def.data_type) && (is_buffer_tensor || is_image2d_tensor || - is_image2d_array_tensor || is_single_image_tensor); -} - -absl::Status GetOpenCLMemory(const TensorObject &obj, cl_mem *memory) -{ - auto texture = absl::get_if<OpenClTexture>(&obj); - auto buffer = absl::get_if<OpenClBuffer>(&obj); - if (texture && texture->memobj) - { - *memory = texture->memobj; - } - else if (buffer && buffer->memobj) - { - *memory = buffer->memobj; - } - else - { - return absl::InvalidArgumentError("Missing OpenCL object."); - } - return absl::OkStatus(); -} - -// Implements conversion from OpenCL tensor to another OpenCL tensor. -class TensorToTensorConverter : public OpenClConverterImpl -{ -public: - static bool IsSupported(const ObjectDef &input, const ObjectDef &output) - { - return IsOpenCLTensor(input) && IsOpenCLTensor(output); - } - - absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) final - { - src_tensor_descriptor_.layout = Layout::BHWC; - src_tensor_descriptor_.storage_type = - ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout); - src_tensor_descriptor_.data_type = input_def.object_def.data_type; - args_.AddObjectRef("src_tensor", AccessType::READ, - absl::make_unique<TensorDescriptor>(src_tensor_descriptor_)); - - dst_tensor_descriptor_.layout = Layout::BHWC; - dst_tensor_descriptor_.storage_type = - ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout); - dst_tensor_descriptor_.data_type = output_def.object_def.data_type; - args_.AddObjectRef("dst_tensor", AccessType::WRITE, - absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_)); - - const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 || - output_def.object_def.data_type == DataType::FLOAT16; - const std::string out_data_type = ToCLDataType(output_def.object_def.data_type); - std::string shader_src; - if (need_fp16_support) - { - shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - } - shader_src += - R"(__kernel void tensor_to_tensor($0) { - int linear_id = get_global_id(0); - int x = linear_id / args.dst_tensor.Batch(); - int b = linear_id % args.dst_tensor.Batch(); - int y = get_global_id(1); - int d = get_global_id(2); - if (x >= args.dst_tensor.Width() || y >= args.dst_tensor.Height() || d >= args.dst_tensor.Slices()) return; -)"; - shader_src += - " " + out_data_type + "4 input = args.src_tensor.Read<" + out_data_type + ">(x, y, d, b);\n"; - shader_src += " args.dst_tensor.Write(input, x, y, d, b);\n}"; - queue_ = environment->queue(); - context_ = &environment->context(); - shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, - input_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); - return environment->program_cache()->GetOrCreateCLKernel( - shader_src, "tensor_to_tensor", environment->context(), environment->device(), &kernel_); - } - - absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override - { - cl_mem in_memory = nullptr; - RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory)); - cl_mem out_memory = nullptr; - RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory)); - - Tensor src_tensor; - RETURN_IF_ERROR( - CreateSharedTensor(*context_, in_memory, shape_, src_tensor_descriptor_, &src_tensor)); - Tensor dst_tensor; - RETURN_IF_ERROR( - CreateSharedTensor(*context_, out_memory, shape_, dst_tensor_descriptor_, &dst_tensor)); - - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor)); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor)); - - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - const int3 grid = - int3(dst_tensor.Width() * dst_tensor.Batch(), dst_tensor.Height(), dst_tensor.Slices()); - const int3 work_group_size = {16, 8, 1}; - const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size); - return queue_->Dispatch(kernel_, work_groups_count, work_group_size); - } - -private: - TensorDescriptor src_tensor_descriptor_; - TensorDescriptor dst_tensor_descriptor_; -}; - -// Implements conversion from OpenCL-specific tensor layout to BHWC OpenCL -// buffer. -class TensorToBHWCBufferConverter : public OpenClConverterImpl -{ -public: - static bool IsSupported(const ObjectDef &input, const ObjectDef &output) - { - return IsOpenCLTensor(input) && IsBHWCOpenCLBuffer(output); - } - - absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) final - { - TensorStorageType src_tensor_type = - ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout); - tensor_descriptor_.layout = Layout::BHWC; - tensor_descriptor_.storage_type = src_tensor_type; - tensor_descriptor_.data_type = input_def.object_def.data_type; - args_.AddObjectRef("tensor", AccessType::READ, - absl::make_unique<TensorDescriptor>(tensor_descriptor_)); - - const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 || - output_def.object_def.data_type == DataType::FLOAT16; - std::string shader_src; - if (need_fp16_support) - { - shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - } - const std::string out_data_type = ToCLDataType(output_def.object_def.data_type); - shader_src += "__kernel void tensor_to_bhwc("; - shader_src += "__global " + out_data_type + "* dst, $0) {\n"; - shader_src += R"( int linear_id = get_global_id(0); - int x = linear_id / args.tensor.Batch(); - int b = linear_id % args.tensor.Batch(); - int y = get_global_id(1); - int d = get_global_id(2); - if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return; -)"; - shader_src += - " " + out_data_type + "4 input = args.tensor.Read<" + out_data_type + ">(x, y, d, b);\n"; - shader_src += R"( int c = d * 4; - int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c; - - dst[index] = input.x; - if (c + 1 < args.tensor.Channels()) { - dst[index + 1] = input.y; - } - if (c + 2 < args.tensor.Channels()) { - dst[index + 2] = input.z; - } - if (c + 3 < args.tensor.Channels()) { - dst[index + 3] = input.w; - } -})"; - queue_ = environment->queue(); - context_ = &environment->context(); - shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, - input_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); - return environment->program_cache()->GetOrCreateCLKernel( - shader_src, "tensor_to_bhwc", environment->context(), environment->device(), &kernel_); - } - - absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override - { - auto output = absl::get_if<OpenClBuffer>(&output_obj); - if (!output || !output->memobj) - { - return absl::InvalidArgumentError("Missing output in tensor_to_bhwc converter"); - } - - cl_mem in_memory = nullptr; - RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory)); - Tensor tensor; - RETURN_IF_ERROR(CreateSharedTensor(*context_, in_memory, shape_, tensor_descriptor_, &tensor)); - return DispatchKernel(output->memobj, &tensor); - } -}; - -// Implements conversion from BHWC OpenCL buffer to OpenCL-specific tensor -// layout. -class BHWCBufferToTensorConverter : public OpenClConverterImpl -{ -public: - static bool IsSupported(const ObjectDef &input, const ObjectDef &output) - { - return IsBHWCOpenCLBuffer(input) && IsOpenCLTensor(output); - } - - std::pair<std::string, std::string> GetFromBhwcKernel(const TensorObjectDef &input_def, - const TensorObjectDef &) const - { - return std::make_pair("__global " + ToCLDataType(input_def.object_def.data_type) + "* src", - R"(int c = d * 4; - int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c; - result.x = src[index]; - result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1; - result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2; - result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3; -)"); - } - - absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) final - { - auto params_kernel = GetFromBhwcKernel(input_def, output_def); - - TensorStorageType dst_tensor_type = - ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout); - tensor_descriptor_.layout = Layout::BHWC; - tensor_descriptor_.storage_type = dst_tensor_type; - tensor_descriptor_.data_type = output_def.object_def.data_type; - args_.AddObjectRef("tensor", AccessType::WRITE, - absl::make_unique<TensorDescriptor>(tensor_descriptor_)); - - const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 || - output_def.object_def.data_type == DataType::FLOAT16; - std::string shader_src; - if (need_fp16_support) - { - shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - } - const std::string in_data_type = ToCLDataType(input_def.object_def.data_type); - const std::string out_data_type = ToCLDataType(output_def.object_def.data_type); - shader_src += "__kernel void bhwc_to_tensor("; - shader_src += "__global " + in_data_type + "* src, $0) {\n"; - - shader_src += R"( int linear_id = get_global_id(0); - int x = linear_id / args.tensor.Batch(); - int b = linear_id % args.tensor.Batch(); - int y = get_global_id(1); - int d = get_global_id(2); - - if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return; -)"; - shader_src += " " + out_data_type + "4 result;\n"; - shader_src += R"( int c = d * 4; - int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c; - result.x = src[index]; - result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1; - result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2; - result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3; -)"; - shader_src += " args.tensor.Write(result, x, y, d, b);\n}"; - queue_ = environment->queue(); - context_ = &environment->context(); - shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w, - output_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); - return environment->program_cache()->GetOrCreateCLKernel( - shader_src, "bhwc_to_tensor", environment->context(), environment->device(), &kernel_); - } - - absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override - { - auto input = absl::get_if<OpenClBuffer>(&input_obj); - if (!input || !input->memobj) - { - return absl::InvalidArgumentError("Missing input in bhwc_to_tensor converter"); - } - cl_mem out_memory = nullptr; - RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory)); - Tensor tensor; - RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_, tensor_descriptor_, &tensor)); - return DispatchKernel(input->memobj, &tensor); - } -}; - -std::array<size_t, 3> CalculateTextureRegion(const TensorObjectDef &def) -{ - const auto &dims = def.dimensions; - std::array<size_t, 3> region = {0, 0, 1}; - switch (ToTensorStorageType(def.object_def.object_type, def.object_def.data_layout)) - { - case TensorStorageType::SINGLE_TEXTURE_2D: - region[0] = static_cast<size_t>(dims.w * dims.b); - region[1] = static_cast<size_t>(dims.h); - break; - case TensorStorageType::TEXTURE_2D: - region[0] = static_cast<size_t>(dims.w * dims.b); - region[1] = static_cast<size_t>(dims.h * dims.d()); - break; - case TensorStorageType::TEXTURE_ARRAY: - region[0] = static_cast<size_t>(dims.w * dims.b); - region[1] = static_cast<size_t>(dims.h); - region[2] = static_cast<size_t>(dims.d()); - break; - default: - break; - } - return region; -} - -bool IsOpenClTextureOrBuffer(ObjectType type) -{ - return type == ObjectType::OPENCL_BUFFER || type == ObjectType::OPENCL_TEXTURE; -} - -// Copies data from one object of the same type and layout to another object. -class TrivialCopier : public OpenClConverterImpl -{ -public: - static bool IsSupported(const ObjectDef &input, const ObjectDef &output) - { - return IsOpenClTextureOrBuffer(input.object_type) && input.data_type == output.data_type && - input.object_type == output.object_type && input.data_layout == output.data_layout; - } - - absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) final - { - shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, - input_def.dimensions.c); - data_type_ = input_def.object_def.data_type; - queue_ = environment->queue(); - region_ = CalculateTextureRegion(output_def); - return absl::OkStatus(); - } - - absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override - { - auto texture_input = absl::get_if<OpenClTexture>(&input_obj); - auto texture_output = absl::get_if<OpenClTexture>(&output_obj); - if (texture_input && texture_output) - { - return Copy(*texture_input, *texture_output); - } - auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj); - auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj); - if (buffer_input && buffer_output) - { - return Copy(*buffer_input, *buffer_output); - } - return absl::InternalError("Unexpected object"); - } - - absl::Status Copy(const OpenClBuffer &input, const OpenClBuffer &output) - { - if (input.memobj == output.memobj) - { - return absl::OkStatus(); - } - return GetOpenCLError(clEnqueueCopyBuffer(queue_->queue(), input.memobj, output.memobj, 0, 0, - SizeOf(data_type_) * shape_.w * shape_.h * - AlignByN(shape_.c, 4) * shape_.b, - 0, nullptr, nullptr)); - } - - absl::Status Copy(const OpenClTexture &input, const OpenClTexture &output) - { - if (input.memobj == output.memobj) - { - return absl::OkStatus(); - } - size_t origin[3] = {0, 0, 0}; - return GetOpenCLError(clEnqueueCopyImage(queue_->queue(), input.memobj, output.memobj, origin, - origin, region_.data(), 0, nullptr, nullptr)); - } - -private: - DataType data_type_ = DataType::UNKNOWN; - std::array<size_t, 3> region_; -}; - -// Copies data from/to CPU into a tensor. -class CpuCopier : public OpenClConverterImpl -{ -public: - static bool IsSupported(const ObjectDef &input, const ObjectDef &output) - { - return input.data_type == output.data_type && input.data_layout == output.data_layout && - ((input.object_type == ObjectType::CPU_MEMORY && - IsOpenClTextureOrBuffer(output.object_type)) || - (output.object_type == ObjectType::CPU_MEMORY && - IsOpenClTextureOrBuffer(input.object_type))); - } - - absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def, - Environment *environment) final - { - - region_ = CalculateTextureRegion( - input_def.object_def.object_type == ObjectType::CPU_MEMORY ? output_def : input_def); - queue_ = environment->queue(); - return absl::OkStatus(); - } - - absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override - { - auto cpu_input = absl::get_if<CpuMemory>(&input_obj); - auto cpu_output = absl::get_if<CpuMemory>(&output_obj); - - if (cpu_input) - { - auto texture_output = absl::get_if<OpenClTexture>(&output_obj); - if (texture_output) - { - return queue_->EnqueueWriteImage(texture_output->memobj, - int3(region_[0], region_[1], region_[2]), cpu_input->data); - } - auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj); - if (buffer_output) - { - return queue_->EnqueueWriteBuffer(buffer_output->memobj, cpu_input->size_bytes, - cpu_input->data); - } - } - else if (cpu_output) - { - auto texture_input = absl::get_if<OpenClTexture>(&input_obj); - if (texture_input) - { - return queue_->EnqueueReadImage(texture_input->memobj, - int3(region_[0], region_[1], region_[2]), cpu_output->data); - } - auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj); - if (buffer_input) - { - return queue_->EnqueueReadBuffer(buffer_input->memobj, cpu_output->size_bytes, - cpu_output->data); - } - } - return absl::InternalError("Unexpected object"); - } - -private: - std::array<size_t, 3> region_; -}; - -class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder -{ -public: - explicit OpenClTensorConverterBuilder(Environment *environment) : environment_(environment) {} - - bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const final - { - const auto &input_def = input.object_def; - const auto &output_def = output.object_def; - return input.dimensions == output.dimensions && - (TrivialCopier::IsSupported(input_def, output_def) || - TensorToTensorConverter::IsSupported(input_def, output_def) || - CpuCopier::IsSupported(input_def, output_def) || - TensorToBHWCBufferConverter::IsSupported(input_def, output_def) || - BHWCBufferToTensorConverter::IsSupported(input_def, output_def)); - } - - absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output, - std::unique_ptr<TensorObjectConverter> *converter) final - { - std::unique_ptr<OpenClConverterImpl> impl; - const auto &input_def = input.object_def; - const auto &output_def = output.object_def; - if (TrivialCopier::IsSupported(input_def, output_def)) - { - impl = absl::make_unique<TrivialCopier>(); - } - else if (TensorToTensorConverter::IsSupported(input_def, output_def)) - { - impl = absl::make_unique<TensorToTensorConverter>(); - } - else if (CpuCopier::IsSupported(input_def, output_def)) - { - impl = absl::make_unique<CpuCopier>(); - } - else if (TensorToBHWCBufferConverter::IsSupported(input_def, output_def)) - { - impl = absl::make_unique<TensorToBHWCBufferConverter>(); - } - else if (BHWCBufferToTensorConverter::IsSupported(input_def, output_def)) - { - impl = absl::make_unique<BHWCBufferToTensorConverter>(); - } - else - { - return absl::UnimplementedError("Unsupported conversion"); - } - RETURN_IF_ERROR(impl->Init(input, output, environment_)); - *converter = std::move(impl); - return absl::OkStatus(); - } - - Environment *environment_; -}; - -} // namespace - -std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment) -{ - return absl::make_unique<OpenClTensorConverterBuilder>(environment); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h deleted file mode 100644 index d69ec85bb..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__ - -#include <memory> - -#include "open_cl/Environment.h" -#include "open_cl/Spi.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -// Supports conversions from BHWC to internal OpenCL tensor representation and -// back. Also supports F16/F32. -std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc deleted file mode 100644 index e409fef47..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DepthwiseConv.h" - -#include <string> -#include <utility> -#include <vector> - -#include "open_cl/ClDevice.h" -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/LinearStorage.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -bool IsSpecializedCase(int channel_multiplier) -{ - return channel_multiplier == 1 || channel_multiplier == 2 || channel_multiplier == 4; -} - -std::string GetSrcValue(int channel_multiplier, const std::string coords) -{ - std::string c; - if (channel_multiplier == 1) - { - c += " FLT4 src_final = args.src_tensor.Read(" + coords + ", S);\n"; - } - else if (channel_multiplier == 2) - { - c += " int s_layer = S / 2;\n"; - c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n"; - c += " FLT2 t0 = S % 2 == 0 ? src.xy : src.zw;\n"; - c += " FLT4 src_final = (FLT4)(t0.x, t0.x, t0.y, t0.y);\n"; - } - else if (channel_multiplier == 4) - { - c += " int s_layer = S / 4;\n"; - c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n"; - c += " FLT t0 = src.x;\n"; - c += " int reminder = S % 4;\n"; - c += " if (reminder == 1) t0 = src.y;\n"; - c += " if (reminder == 2) t0 = src.z;\n"; - c += " if (reminder == 3) t0 = src.w;\n"; - c += " FLT4 src_final = (FLT4)(t0, t0, t0, t0);\n"; - } - else - { - c += " int s_layer = S / args.ch_multiplier;\n"; - c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n"; - c += " int s_offset = (S % args.ch_multiplier) * 4;\n"; - c += " FLT4 src_final;\n"; - c += " FLT temp_arr[4] = {src.x, src.y, src.z, src.w};\n"; - c += " src_final.x = temp_arr[(s_offset + 0) / args.ch_multiplier];\n"; - c += " src_final.y = temp_arr[(s_offset + 1) / args.ch_multiplier];\n"; - c += " src_final.z = temp_arr[(s_offset + 2) / args.ch_multiplier];\n"; - c += " src_final.w = temp_arr[(s_offset + 3) / args.ch_multiplier];\n"; - } - - return c; -} - -std::string GenerateDepthwiseConvolutionCode(const OperationDef &op_def, bool stride_correction, - int channel_multiplier, bool weights_are_buffer, - bool dynamic_weights, GPUOperation *op) -{ - auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddSrcTensor("src_tensor", src_desc); - if (dynamic_weights) - { - op->AddSrcTensor("weights", op_def.src_tensors[1]); - } - - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddDstTensor("dst_tensor", dst_desc); - - const auto src_tensor_type = op_def.src_tensors[0].storage_type; - - std::string c = GetCommonDefines(op_def.precision); - - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || - src_tensor_type == TensorStorageType::IMAGE_BUFFER; - - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int linear_id_1 = get_global_id(1);\n"; - c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n"; - c += " int Z = linear_id_1 % args.dst_tensor.Depth();\n"; - } - else - { - c += " int Y = get_global_id(1);\n"; - } - c += " int S = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "S >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - c += " ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n"; - if (stride_correction) - { - c += " int x_offseted = " + - GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") + - ";\n"; - } - else - { - if (op_def.IsBatchSupported()) - { - c += " int x_offseted = X * args.stride_x + args.padding_x * " - "args.src_tensor.Batch();\n"; - } - else - { - c += " int x_offseted = X * args.stride_x + args.padding_x;\n"; - } - } - c += " int y_offseted = Y * args.stride_y + args.padding_y;\n"; - if (!dynamic_weights) - { - std::string weights_offset = "args.kernel_size_x * args.kernel_size_y"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int z_offseted = Z * args.stride_z + args.padding_z;\n"; - weights_offset += " * args.kernel_size_z"; - } - if (weights_are_buffer) - { - c += " int fx_c = S * " + weights_offset + ";\n"; - } - else - { - c += " int fx_c = 0;\n"; - } - } - std::string kernel_size_x = dynamic_weights ? "args.weights.Width()" : "args.kernel_size_x"; - std::string kernel_size_y = dynamic_weights ? "args.weights.Height()" : "args.kernel_size_y"; - std::string kernel_size_z = dynamic_weights ? "args.weights.Depth()" : "args.kernel_size_z"; - - std::string flat_coords = "x_c, y_c"; - if (manual_clamp) - { - std::string check = "!outside_x && !outside_y"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - check += " && !outside_z"; - flat_coords += ", z_c"; - c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n"; - c += " int z_c = z_offseted + kz * args.dilation_z;\n"; - c += " bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n"; - } - c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n"; - c += " int y_c = y_offseted + ky * args.dilation_y;\n"; - c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n"; - c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n"; - const std::string dilation_x = - op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x"; - c += " int x_c = x_offseted + kx * " + dilation_x + ";\n"; - c += " bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n"; - c += " if (" + check + ") {\n"; - if (dynamic_weights) - { - c += " FLT4 f = args.weights.Read(kx, ky, S);\n"; - } - else - { - if (weights_are_buffer) - { - c += " FLT4 f = args.weights.Read(fx_c);\n"; - } - else - { - c += " FLT4 f = args.weights.Read(fx_c, S);\n"; - } - } - c += GetSrcValue(channel_multiplier, flat_coords); - c += " r += TO_ACCUM_TYPE(src_final * f);\n"; - c += " };\n"; - if (!dynamic_weights) - { - c += " fx_c++;\n"; - } - c += " }\n"; - c += " }\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " }\n"; - } - } - else - { // Texture types with ZERO clamping - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - flat_coords += ", z_c"; - c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n"; - c += " int z_c = z_offseted + kz * args.dilation_z;\n"; - if (src_tensor_type != TensorStorageType::TEXTURE_3D) - { // Only TEXTURE_3D supports clamping - // in DEPTH dimension - c += " if (z_c < 0 || z_c >= args.src_tensor.Depth()) {\n"; - c += " fx_c += args.kernel_size_y * args.kernel_size_x;\n"; - c += " continue;\n"; - c += " }\n"; - } - } - c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n"; - c += " int y_c = y_offseted + ky * args.dilation_y;\n"; - c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n"; - const std::string dilation_x = - op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x"; - c += " int x_c = x_offseted + kx * " + dilation_x + ";\n"; - c += GetSrcValue(channel_multiplier, flat_coords); - if (dynamic_weights) - { - c += " FLT4 f = args.weights.Read(kx, ky, S);\n"; - } - else - { - if (weights_are_buffer) - { - c += " FLT4 f = args.weights.Read(fx_c);\n"; - } - else - { - c += " FLT4 f = args.weights.Read(fx_c, S);\n"; - } - c += " fx_c++;\n"; - } - c += " r += TO_ACCUM_TYPE(src_final * f);\n"; - c += " }\n"; - c += " }\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " }\n"; - } - } - c += " FLT4 res0 = TO_FLT4(r) + args.biases.Read(S);\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " args.dst_tensor.Write(res0, X, Y, Z, S);\n"; - } - else - { - c += " args.dst_tensor.Write(res0, X, Y, S);\n"; - } - c += "}\n"; - - return c; -} -} // namespace - -GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr) -{ - bool weights_are_buffer = device_info.IsMali(); - GPUOperation op(definition); - op.args_.AddInt("kernel_size_x", attr.weights.shape.w); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("dilation_x", attr.dilations.w); - op.args_.AddInt("kernel_size_y", attr.weights.shape.h); - op.args_.AddInt("stride_y", attr.strides.h); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("dilation_y", attr.dilations.h); - if (!IsSpecializedCase(attr.weights.shape.o)) - { - op.args_.AddInt("ch_multiplier", attr.weights.shape.o); - } - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o, - weights_are_buffer, false, &op); - UploadWeightsForDWConv2D(attr.weights, weights_are_buffer, definition.precision, &op); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - - TensorLinearDescriptor desc; - desc.storage_type = - weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; - desc.element_type = definition.GetDataType(); - desc.UploadLinearData(attr.bias); - op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); - return op; -} - -GPUOperation -CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr) -{ - GPUOperation op(definition); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("dilation_x", attr.dilations.w); - op.args_.AddInt("stride_y", attr.strides.h); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("dilation_y", attr.dilations.h); - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1, false, true, &op); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - - TensorLinearDescriptor desc; - desc.storage_type = - device_info.IsMali() ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; - desc.element_type = definition.GetDataType(); - desc.UploadLinearData(attr.bias); - op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); - return op; -} - -GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution3DAttributes &attr) -{ - bool weights_are_buffer = device_info.IsMali(); - GPUOperation op(definition); - op.args_.AddInt("kernel_size_x", attr.weights.shape.w); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("dilation_x", attr.dilations.w); - op.args_.AddInt("kernel_size_y", attr.weights.shape.h); - op.args_.AddInt("stride_y", attr.strides.h); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("dilation_y", attr.dilations.h); - op.args_.AddInt("kernel_size_z", attr.weights.shape.d); - op.args_.AddInt("stride_z", attr.strides.d); - op.args_.AddInt("padding_z", -attr.padding.prepended.d); - op.args_.AddInt("dilation_z", attr.dilations.d); - if (!IsSpecializedCase(attr.weights.shape.o)) - { - op.args_.AddInt("ch_multiplier", attr.weights.shape.o); - } - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o, - weights_are_buffer, false, &op); - UploadWeightsForDWConv3D(attr.weights, weights_are_buffer, definition.precision, &op); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - - TensorLinearDescriptor desc; - desc.storage_type = - weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; - desc.element_type = definition.GetDataType(); - desc.UploadLinearData(attr.bias); - op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h deleted file mode 100644 index cbadd9fde..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__ - -#include <vector> - -#include "open_cl/Buffer.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/LinearStorage.h" -#include "open_cl/Tensor.h" -#include "open_cl/Texture2d.h" -#include "open_cl/Util.h" -#include "open_cl/DataType.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -template <DataType S, typename T> -void RearrangeWeightsForDWConv2D(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst) -{ - const int dst_channels = weights.shape.i * weights.shape.o; - const int dst_depth = DivideRoundUp(dst_channels, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - - int counter = 0; - for (int d = 0; d < dst_depth; ++d) - { - for (int y = 0; y < kernel_y; ++y) - { - for (int x = 0; x < kernel_x; ++x) - { - T filter_val; - for (int i = 0; i < 4; ++i) - { - const int d_ch = d * 4 + i; - if (d_ch < dst_channels) - { - const int f_index = - weights.shape.LinearIndex({d_ch % weights.shape.o, y, x, d_ch / weights.shape.o}); - filter_val[i] = weights.data[f_index]; - } - else - { - filter_val[i] = 0.0f; - } - } - dst[counter++] = filter_val; - } - } - } -} - -template <DataType T> -void UploadWeightsForDWConv2D(const InternalTensor<OHWI, T> &weights, bool weights_are_buffer, - CalculationsPrecision precision, GPUOperation *op) -{ - const int dst_channels = weights.shape.i * weights.shape.o; - const int dst_slices = DivideRoundUp(dst_channels, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - - const int elements_count = kernel_x * kernel_y * dst_slices; - - const bool fp32_weights = precision == CalculationsPrecision::F32; - const int float4_size = fp32_weights ? 16 : 8; - - std::vector<uint8_t> data(float4_size * elements_count); - - if (fp32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(data.data()); - RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count)); - } - // TODO - // It doesn't support F16 yet. I will try to add it later. - // - // else { - // half4* ptr = reinterpret_cast<half4*>(data.data()); - // RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count)); - // } - - if (weights_are_buffer) - { - BufferDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 4; - desc.size = float4_size * elements_count; - desc.data = std::move(data); - op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc)); - } - else - { - Texture2DDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.size = int2(kernel_x * kernel_y, dst_slices); - desc.data = std::move(data); - op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(desc)); - } -} - -template <DataType S, typename T> -void RearrangeWeightsForDWConv3D(const InternalTensor<OHWDI, S> &weights, absl::Span<T> dst) -{ - const int dst_channels = weights.shape.i * weights.shape.o; - const int dst_slices = DivideRoundUp(dst_channels, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - const int kernel_z = weights.shape.d; - - int counter = 0; - for (int d = 0; d < dst_slices; ++d) - { - for (int z = 0; z < kernel_z; ++z) - { - for (int y = 0; y < kernel_y; ++y) - { - for (int x = 0; x < kernel_x; ++x) - { - T filter_val; - for (int i = 0; i < 4; ++i) - { - const int d_ch = d * 4 + i; - if (d_ch < dst_channels) - { - const int f_index = weights.shape.LinearIndex( - {d_ch % weights.shape.o, y, x, z, d_ch / weights.shape.o}); - filter_val[i] = weights.data[f_index]; - } - else - { - filter_val[i] = 0.0f; - } - } - dst[counter++] = filter_val; - } - } - } - } -} - -template <DataType T> -void UploadWeightsForDWConv3D(const InternalTensor<OHWDI, T> &weights, bool weights_are_buffer, - CalculationsPrecision precision, GPUOperation *op) -{ - const int dst_channels = weights.shape.i * weights.shape.o; - const int dst_slices = DivideRoundUp(dst_channels, 4); - const int kernel_x = weights.shape.w; - const int kernel_y = weights.shape.h; - const int kernel_z = weights.shape.d; - - const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices; - - const bool fp32_weights = precision == CalculationsPrecision::F32; - const int float4_size = fp32_weights ? 16 : 8; - - std::vector<uint8_t> data(float4_size * elements_count); - - if (fp32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(data.data()); - RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count)); - } - // TODO - // It doesn't support F16 yet. I will try to add it later. - // - // else { - // half4* ptr = reinterpret_cast<half4*>(data.data()); - // RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count)); - // } - - if (weights_are_buffer) - { - BufferDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 4; - desc.size = float4_size * elements_count; - desc.data = std::move(data); - op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc))); - } - else - { - Texture2DDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices); - desc.data = std::move(data); - op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc))); - } -} - -GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr); - -GPUOperation -CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr); - -GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution3DAttributes &attr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc deleted file mode 100644 index 89a14f14d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DepthwiseConv3x3.h" - -#include <string> -#include <utility> - -#include "open_cl/kernels/Util.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/Precision.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer, - bool local_mem_uploads, const DeviceInfo &device_info) - : GPUOperation(definition), local_mem_uploads_(local_mem_uploads) -{ - work_group_size_ = int3(8, 4, 1); - code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer, local_mem_uploads_); - - if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR()) - { - compiler_options_.push_back(CompilerOptions::POWERVR_FP16); - } -} - -DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3 &&operation) - : GPUOperation(std::move(operation)), local_mem_uploads_(operation.local_mem_uploads_) -{ -} - -DepthwiseConv3x3 &DepthwiseConv3x3::operator=(DepthwiseConv3x3 &&operation) -{ - if (this != &operation) - { - std::swap(local_mem_uploads_, operation.local_mem_uploads_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -std::string DepthwiseConv3x3::GenerateDepthwiseConvCode(const OperationDef &op_def, - bool weights_are_buffer, - bool local_mem_uploads) -{ - auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); - AddSrcTensor("src_tensor", src_desc); - AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - - const auto src_tensor_type = op_def.src_tensors[0].storage_type; - - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || - src_tensor_type == TensorStorageType::IMAGE_BUFFER; - - std::string c = GetCommonDefines(op_def.precision); - if (local_mem_uploads) - { - c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n"; - } - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int linear_id = get_global_id(0);\n"; - c += " int X = (linear_id / args.dst_tensor.Batch()) * 2;\n"; - c += " int B = linear_id % args.dst_tensor.Batch();\n"; - c += " args.dst_tensor.SetBatchRef(B);\n"; - c += " args.src_tensor.SetBatchRef(B);\n"; - } - else - { - c += " int X = get_global_id(0) * 2;\n"; - } - c += " int Y = get_global_id(1) * 2;\n"; - c += " int S = get_global_id(2);\n"; - c += " ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n"; - c += " ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n"; - c += " ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n"; - c += " ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n"; - if (!local_mem_uploads) - { - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() " - "|| S >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - } - if (local_mem_uploads) - { - c += " __local FLT4 f[10];\n"; - c += " event_t e = async_work_group_copy(f, args.weights.GetPtr() + S * " - "10, 10, 0);\n"; - c += " wait_group_events(1, &e);\n"; - } - else if (weights_are_buffer) - { - c += " __global FLT4* f = args.weights.GetPtr() + S * 10;\n"; - } - c += " FLT4 s0;\n"; - c += " FLT4 s1;\n"; - c += " FLT4 s2;\n"; - c += " FLT4 s3;\n"; - std::string W[9] = {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"}; - std::string bias = "bias"; - std::string xc[4] = {"X - 1", "X", "X + 1", "X + 2"}; - std::string yc[4] = {"Y - 1", "Y", "Y + 1", "Y + 2"}; - if (!weights_are_buffer) - { - c += " FLT4 f0 = args.weights.Read(0, S);\n"; - c += " FLT4 f1 = args.weights.Read(1, S);\n"; - c += " FLT4 f2 = args.weights.Read(2, S);\n"; - c += " FLT4 f3 = args.weights.Read(3, S);\n"; - c += " FLT4 f4 = args.weights.Read(4, S);\n"; - c += " FLT4 f5 = args.weights.Read(5, S);\n"; - c += " FLT4 f6 = args.weights.Read(6, S);\n"; - c += " FLT4 f7 = args.weights.Read(7, S);\n"; - c += " FLT4 f8 = args.weights.Read(8, S);\n"; - } - if (manual_clamp) - { - c += " int x0 = X - 1;\n"; - c += " int x1 = X;\n"; - c += " int x2 = X + 1;\n"; - c += " int x3 = X + 2;\n"; - c += " int y0 = Y - 1;\n"; - c += " int y1 = Y;\n"; - c += " int y2 = Y + 1;\n"; - c += " int y3 = Y + 2;\n"; - c += " bool x0_in = x0 >= 0 && x0 < args.dst_tensor.Width();\n"; - c += " bool x1_in = x1 >= 0 && x1 < args.dst_tensor.Width();\n"; - c += " bool x2_in = x2 >= 0 && x2 < args.dst_tensor.Width();\n"; - c += " bool x3_in = x3 >= 0 && x3 < args.dst_tensor.Width();\n"; - c += " bool y0_in = y0 >= 0 && y0 < args.dst_tensor.Height();\n"; - c += " bool y1_in = y1 >= 0 && y1 < args.dst_tensor.Height();\n"; - c += " bool y2_in = y2 >= 0 && y2 < args.dst_tensor.Height();\n"; - c += " bool y3_in = y3 >= 0 && y3 < args.dst_tensor.Height();\n"; - c += " x0 = clamp(x0, 0, args.dst_tensor.Width() - 1);\n"; - c += " x1 = clamp(x1, 0, args.dst_tensor.Width() - 1);\n"; - c += " x2 = clamp(x2, 0, args.dst_tensor.Width() - 1);\n"; - c += " x3 = clamp(x3, 0, args.dst_tensor.Width() - 1);\n"; - c += " y0 = clamp(y0, 0, args.dst_tensor.Height() - 1);\n"; - c += " y1 = clamp(y1, 0, args.dst_tensor.Height() - 1);\n"; - c += " y2 = clamp(y2, 0, args.dst_tensor.Height() - 1);\n"; - c += " y3 = clamp(y3, 0, args.dst_tensor.Height() - 1);\n"; - if (src_tensor_type == TensorStorageType::BUFFER) - { - c += " __global FLT4* src_loc = " - "args.src_tensor.GetPtrWithSliceOffset(S);\n"; - } - xc[0] = "x0"; - xc[1] = "x1"; - xc[2] = "x2"; - xc[3] = "x3"; - yc[0] = "y0"; - yc[1] = "y1"; - yc[2] = "y2"; - yc[3] = "y3"; - } - if (local_mem_uploads || weights_are_buffer) - { - W[0] = "f[0]"; - W[1] = "f[1]"; - W[2] = "f[2]"; - W[3] = "f[3]"; - W[4] = "f[4]"; - W[5] = "f[5]"; - W[6] = "f[6]"; - W[7] = "f[7]"; - W[8] = "f[8]"; - bias = "f[9]"; - } - auto read_4x_line = [&](int y) { - if (src_tensor_type == TensorStorageType::BUFFER) - { - const std::string y_in = "y" + std::to_string(y) + "_in"; - c += " s0 = src_loc[args.src_tensor.GetWHOffset(" + xc[0] + ", " + yc[y] + - ")] * (FLT)(x0_in && " + y_in + ");\n"; - c += " s1 = src_loc[args.src_tensor.GetWHOffset(" + xc[1] + ", " + yc[y] + - ")] * (FLT)(x1_in && " + y_in + ");\n"; - c += " s2 = src_loc[args.src_tensor.GetWHOffset(" + xc[2] + ", " + yc[y] + - ")] * (FLT)(x2_in && " + y_in + ");\n"; - c += " s3 = src_loc[args.src_tensor.GetWHOffset(" + xc[3] + ", " + yc[y] + - ")] * (FLT)(x3_in && " + y_in + ");\n"; - } - else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) - { - const std::string y_in = "y" + std::to_string(y) + "_in"; - c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S) * (FLT)(x0_in && " + - y_in + ");\n"; - c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S) * (FLT)(x1_in && " + - y_in + ");\n"; - c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S) * (FLT)(x2_in && " + - y_in + ");\n"; - c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S) * (FLT)(x3_in && " + - y_in + ");\n"; - } - else - { - c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S);\n"; - c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S);\n"; - c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S);\n"; - c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S);\n"; - } - }; - c += " {\n"; - read_4x_line(0); - c += " r0 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n"; - c += " }\n"; - c += " {\n"; - read_4x_line(1); - c += " r0 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n"; - c += " }\n"; - c += " {\n"; - read_4x_line(2); - c += " r0 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n"; - c += " r0 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n"; - c += " r1 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n"; - c += " }\n"; - c += " {\n"; - read_4x_line(3); - c += " r2 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n"; - c += " r2 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n"; - c += " r3 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n"; - c += " }\n"; - if (!weights_are_buffer) - { - c += " FLT4 bias = args.weights.Read(9, S);\n"; - } - c += " r0 += TO_ACCUM_TYPE(" + bias + ");\n"; - c += " r1 += TO_ACCUM_TYPE(" + bias + ");\n"; - c += " r2 += TO_ACCUM_TYPE(" + bias + ");\n"; - c += " r3 += TO_ACCUM_TYPE(" + bias + ");\n"; - if (local_mem_uploads) - { - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() " - "|| S >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - } - c += " if(X + 0 < args.dst_tensor.Width() && Y + 0 < " - "args.dst_tensor.Height()) {\n"; - c += " FLT4 result = TO_FLT4(r0);\n"; - c += " args.dst_tensor.Write(result, X + 0, Y + 0, S)\n"; - c += " }\n"; - c += " if(X + 1 < args.dst_tensor.Width() && Y + 0 < " - "args.dst_tensor.Height()) {\n"; - c += " FLT4 result = TO_FLT4(r1);\n"; - c += " args.dst_tensor.Write(result, X + 1, Y + 0, S)\n"; - c += " }\n"; - c += " if(X + 0 < args.dst_tensor.Width() && Y + 1 < " - "args.dst_tensor.Height()) {\n"; - c += " FLT4 result = TO_FLT4(r2);\n"; - c += " args.dst_tensor.Write(result, X + 0, Y + 1, S)\n"; - c += " }\n"; - c += " if(X + 1 < args.dst_tensor.Width() && Y + 1 < " - "args.dst_tensor.Height()) {\n"; - c += " FLT4 result = TO_FLT4(r3);\n"; - c += " args.dst_tensor.Write(result, X + 1, Y + 1, S)\n"; - c += " }\n"; - c += "}\n"; - - return c; -} - -int3 DepthwiseConv3x3::GetGridSize() const -{ - const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch(); - const int grid_y = DivideRoundUp(dst_[0]->Height(), 2); - const int grid_z = dst_[0]->Slices(); - return int3(grid_x, grid_y, grid_z); -} - -void DepthwiseConv3x3::GetPossibleKernelWorkGroups(TuningType tuning_type, - const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const -{ - if (local_mem_uploads_) - { - work_groups->push_back(work_group_size_); - } - else - { - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups); - } -} - -bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr) -{ - return attr.weights.shape.o == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 && - attr.weights.shape.w == 3 && attr.weights.shape.h == 3 && attr.strides.w == 1 && - attr.strides.h == 1 && attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1 && - attr.padding.appended.w == 1 && attr.padding.appended.h == 1; -} - -DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr) -{ - bool weights_are_buffer = device_info.IsPowerVR() || device_info.IsMali(); - bool local_mem_uploads = weights_are_buffer && device_info.IsPowerVR(); - DepthwiseConv3x3 result(definition, weights_are_buffer, local_mem_uploads, device_info); - result.UploadWeightsAndBiases(attr.weights, attr.bias, weights_are_buffer); - return result; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h deleted file mode 100644 index 8c571105a..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__ - -#include <memory> -#include <vector> - -#include "open_cl/Buffer.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/Tensor.h" -#include "open_cl/Texture2d.h" -#include "open_cl/Util.h" -#include "open_cl/DataType.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class DepthwiseConv3x3 : public GPUOperation -{ -public: - DepthwiseConv3x3() = default; - void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const override; - int3 GetGridSize() const override; - - // Move only - DepthwiseConv3x3(DepthwiseConv3x3 &&operation); - DepthwiseConv3x3 &operator=(DepthwiseConv3x3 &&operation); - DepthwiseConv3x3(const DepthwiseConv3x3 &) = delete; - DepthwiseConv3x3 &operator=(const DepthwiseConv3x3 &) = delete; - -private: - explicit DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer, - bool local_mem_uploads, const DeviceInfo &device_info); - template <DataType T> - void UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights, - const InternalTensor<Linear, T> &biases, bool weights_are_buffer); - - friend DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr); - - template <DataType S, typename T> - void RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights, - const InternalTensor<Linear, S> &biases, absl::Span<T> dst); - - std::string GenerateDepthwiseConvCode(const OperationDef &op_def, bool weights_are_buffer, - bool local_mem_uploads); - - bool local_mem_uploads_; -}; - -template <DataType T> -void DepthwiseConv3x3::UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights, - const InternalTensor<Linear, T> &biases, - bool weights_are_buffer) -{ - const int src_depth = DivideRoundUp(weights.shape.i, 4); - int texture_width = 10; // 3x3 kernel + 1 bias - int texture_height = src_depth; - const int elements_count = texture_width * texture_height; - const bool fp32_weights = definition_.precision == CalculationsPrecision::F32; - const int float4_size = fp32_weights ? 16 : 8; - - std::vector<uint8_t> data(float4_size * elements_count); - if (fp32_weights) - { - float4 *ptr = reinterpret_cast<float4 *>(data.data()); - RearrangeWeightsAndBiasesData(weights, biases, absl::MakeSpan(ptr, elements_count)); - } - // TODO - // It doesn't support F16 yet. I will try to add it later. - // - // else { - // half4* ptr = reinterpret_cast<half4*>(data.data()); - // RearrangeWeightsAndBiasesData(weights, biases, - // absl::MakeSpan(ptr, elements_count)); - // } - - if (weights_are_buffer) - { - BufferDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.element_size = 4; - desc.size = float4_size * elements_count; - desc.data = std::move(data); - args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc))); - } - else - { - Texture2DDescriptor desc; - desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; - desc.size = int2(texture_width, texture_height); - desc.data = std::move(data); - args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc))); - } -} - -template <DataType S, typename T> -void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights, - const InternalTensor<Linear, S> &biases, - absl::Span<T> dst) -{ - const int src_depth = DivideRoundUp(weights.shape.i, 4); - - int counter = 0; - for (int s = 0; s < src_depth; ++s) - { - for (int y = 0; y < 3; ++y) - { - for (int x = 0; x < 3; ++x) - { - T filter_val; - for (int i = 0; i < 4; ++i) - { - const int s_ch = s * 4 + i; - if (s_ch < weights.shape.i) - { - const int f_index = weights.shape.LinearIndex({0, y, x, s_ch}); - filter_val[i] = weights.data[f_index]; - } - else - { - filter_val[i] = 0.0f; - } - } - dst[counter++] = filter_val; - } - } - - T bias_val; - for (int i = 0; i < 4; ++i) - { - const int dst_ch = s * 4 + i; - bias_val[i] = dst_ch >= biases.shape.v ? 0.0f : biases.data[dst_ch]; - } - dst[counter++] = bias_val; - } -} - -bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr); - -DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info, - const OperationDef &definition, - const DepthwiseConvolution2DAttributes &attr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc deleted file mode 100644 index 8839d9687..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc +++ /dev/null @@ -1,385 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GpuOperation.h" - -#include "Util.h" -#include "WorkGroupPicking.h" -#include "open_cl/AccessType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::string GetElementWiseCode(const OperationDef &op_def, bool check_src_slices) -{ - std::string c = GetCommonDefines(op_def.precision); - - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - c += " int Y = get_global_id(1);\n"; - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) return; \n"; - if (check_src_slices) - { - c += " FLT4 src = (FLT4)(0.0f);\n"; - c += " if (Z < args.src_tensor.Slices()) {\n"; - c += " src = args.src_tensor.Read(X, Y, Z);\n"; - c += " }\n"; - } - else - { - c += " FLT4 src = args.src_tensor.Read(X, Y, Z);\n"; - } - c += " args.dst_tensor.Write(src, X, Y, Z);\n"; - c += "} \n"; - return c; -} - -int3 GetWorkGroupsCount(int grid_dimension, const int3 &grid_size, const int3 &work_group_size, - const int3 &work_group_launch_order) -{ - int3 work_groups_count; - if (grid_dimension == 1) - { - work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x); - work_groups_count.y = 1; - work_groups_count.z = 1; - } - else if (grid_dimension == 2) - { - int3 wgs; - wgs.x = DivideRoundUp(grid_size.x, work_group_size.x); - wgs.y = DivideRoundUp(grid_size.y, work_group_size.y); - work_groups_count.x = wgs[work_group_launch_order[0]]; - work_groups_count.y = wgs[work_group_launch_order[1]]; - work_groups_count.z = 1; - } - else - { // grid_dimension == 3 - int3 wgs; - wgs.x = DivideRoundUp(grid_size.x, work_group_size.x); - wgs.y = DivideRoundUp(grid_size.y, work_group_size.y); - wgs.z = DivideRoundUp(grid_size.z, work_group_size.z); - work_groups_count.x = wgs[work_group_launch_order[0]]; - work_groups_count.y = wgs[work_group_launch_order[1]]; - work_groups_count.z = wgs[work_group_launch_order[2]]; - } - return work_groups_count; -} - -} // namespace - -DataType OperationDef::GetDataType() const { return DeduceDataTypeFromPrecision(precision); } - -DataType OperationDef::GetPrimaryDataType() const { return src_tensors[0].data_type; } -TensorStorageType OperationDef::GetPrimaryStorageType() const -{ - return src_tensors[0].storage_type; -} - -bool OperationDef::IsBatchSupported() const -{ - for (const auto &src : src_tensors) - { - if (HasAxis(src.layout, Axis::BATCH)) - { - return true; - } - } - for (const auto &dst : dst_tensors) - { - if (HasAxis(dst.layout, Axis::BATCH)) - { - return true; - } - } - return false; -} - -GPUOperation::GPUOperation(const OperationDef &definition) : definition_(definition) {} - -void GPUOperation::SetSrc(Tensor *ptr, int index) -{ - if (index >= (int)src_.size()) - { - src_.resize(index + 1, nullptr); - } - src_[index] = ptr; -} - -void GPUOperation::SetDst(Tensor *ptr, int index) -{ - if (index >= (int)dst_.size()) - { - dst_.resize(index + 1, nullptr); - } - dst_[index] = ptr; -} - -GPUOperation::GPUOperation(GPUOperation &&operation) - : args_(std::move(operation.args_)), code_(std::move(operation.code_)), - work_group_size_(operation.work_group_size_), - compiler_options_(std::move(operation.compiler_options_)), - tensor_to_grid_(operation.tensor_to_grid_), elementwise_(operation.elementwise_), - linkable_(operation.linkable_), check_src_channels_size_(operation.check_src_channels_size_), - definition_(std::move(operation.definition_)), src_(std::move(operation.src_)), - dst_(std::move(operation.dst_)), kernel_(std::move(operation.kernel_)), - grid_dimension_(operation.grid_dimension_), - work_group_launch_order_(operation.work_group_launch_order_), grid_size_(operation.grid_size_), - src_tensors_names_(std::move(operation.src_tensors_names_)), - dst_tensors_names_(std::move(operation.dst_tensors_names_)), - work_groups_count_(operation.work_groups_count_), linkable_count_(operation.linkable_count_), - elementwise_code_(std::move(operation.elementwise_code_)) -{ -} - -GPUOperation &GPUOperation::operator=(GPUOperation &&operation) -{ - if (this != &operation) - { - args_ = std::move(operation.args_); - code_ = std::move(operation.code_); - std::swap(work_group_size_, operation.work_group_size_); - compiler_options_ = std::move(operation.compiler_options_); - tensor_to_grid_ = operation.tensor_to_grid_; - elementwise_ = operation.elementwise_; - linkable_ = operation.linkable_; - check_src_channels_size_ = operation.check_src_channels_size_; - definition_ = std::move(operation.definition_); - src_ = std::move(operation.src_); - dst_ = std::move(operation.dst_); - kernel_ = std::move(operation.kernel_); - std::swap(grid_dimension_, operation.grid_dimension_); - std::swap(work_group_launch_order_, operation.work_group_launch_order_); - std::swap(grid_size_, operation.grid_size_); - src_tensors_names_ = std::move(operation.src_tensors_names_); - dst_tensors_names_ = std::move(operation.dst_tensors_names_); - std::swap(work_groups_count_, operation.work_groups_count_); - std::swap(linkable_count_, operation.linkable_count_); - elementwise_code_ = std::move(operation.elementwise_code_); - } - return *this; -} - -absl::Status GPUOperation::AddOperation(GPUOperation *operation) -{ - linkable_count_ += 1; - std::string code = operation->code_; - std::string unique_postfix = absl::StrCat("_link", linkable_count_); - operation->args_.RenameArgs(unique_postfix, &code); - elementwise_code_ += "{\n" + code + "\n}\n"; - RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix)); - for (size_t i = 0; i < operation->src_tensors_names_.size(); ++i) - { - definition_.src_tensors.push_back(operation->definition_.src_tensors[i + 1]); - src_tensors_names_.push_back(operation->src_tensors_names_[i] + unique_postfix); - } - for (size_t i = 0; i < operation->dst_tensors_names_.size(); ++i) - { - dst_tensors_names_.push_back(operation->dst_tensors_names_[i] + unique_postfix); - } - return absl::OkStatus(); -} - -void GPUOperation::AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc) -{ - src_tensors_names_.push_back(tensor_name); - auto desc_new = std::make_unique<TensorDescriptor>(desc); - args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new)); -} - -void GPUOperation::AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc) -{ - src_tensors_names_.push_back(buffer_name); - auto desc_new = std::make_unique<BufferDescriptor>(desc); - args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new)); -} - -void GPUOperation::AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc) -{ - dst_tensors_names_.push_back(tensor_name); - auto desc_new = std::make_unique<TensorDescriptor>(desc); - args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new)); -} - -absl::Status GPUOperation::UpdateParams() -{ - for (size_t i = 0; i < src_tensors_names_.size(); ++i) - { - RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i])); - } - for (size_t i = 0; i < dst_tensors_names_.size(); ++i) - { - RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i])); - } - RETURN_IF_ERROR(BindArguments(&args_)); - grid_size_ = GetGridSize(); - work_groups_count_ = - GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_); - return absl::OkStatus(); -} - -absl::Status GPUOperation::AssembleCode(const DeviceInfo &device_info, CLContext *context) -{ - if (elementwise_) - { - auto src_desc = absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]); - if (definition_.IsBatchSupported()) - { - src_desc->SetStateVar("BatchedWidth", "true"); - } - src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor"); - args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - - auto dst_desc = absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]); - if (definition_.IsBatchSupported()) - { - dst_desc->SetStateVar("BatchedWidth", "true"); - } - dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); - args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - - elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_; - code_ = GetElementWiseCode(definition_, check_src_channels_size_); - RETURN_IF_ERROR(args_.AllocateObjects(context)); - RETURN_IF_ERROR( - args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); - } - else - { - RETURN_IF_ERROR(args_.AllocateObjects(context)); - RETURN_IF_ERROR( - args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); - } - return absl::OkStatus(); -} - -absl::Status GPUOperation::Compile(const CreationContext &creation_context) -{ - RETURN_IF_ERROR(AssembleCode(creation_context.GetDeviceInfo(), creation_context.context)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, - &kernel_)); - return PostCompileCheck(creation_context.device->info_, kernel_.info_); -} - -absl::Status GPUOperation::CompileDeserialized(const CreationContext &creation_context) -{ - return creation_context.cache->GetOrCreateCLKernel(code_, "main_function", compiler_options_, - *creation_context.context, - *creation_context.device, &kernel_); -} - -void GPUOperation::GetPossibleKernelWorkGroups(TuningType tuning_type, - const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const -{ - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups); -} - -absl::Status GPUOperation::Tune(const TuningParameters ¶ms) -{ - std::vector<int3> possible_work_groups; - GetPossibleKernelWorkGroups(params.tuning_type, *params.info, kernel_.info_, - &possible_work_groups); - if (possible_work_groups.empty()) - { - return absl::NotFoundError("Can not found work_group size to launch kernel"); - } - if (possible_work_groups.size() == 1) - { - work_group_size_ = possible_work_groups[0]; - work_groups_count_ = - GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_); - return absl::OkStatus(); - } - else - { - std::vector<int3> work_groups_count(possible_work_groups.size()); - for (size_t i = 0; i < work_groups_count.size(); ++i) - { - work_groups_count[i] = GetWorkGroupsCount(grid_dimension_, grid_size_, - possible_work_groups[i], work_group_launch_order_); - } - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel_, *params.info, work_groups_count, possible_work_groups, &best_work_group_index)); - work_group_size_ = possible_work_groups[best_work_group_index]; - work_groups_count_ = - GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_); - return absl::OkStatus(); - } -} - -int3 GPUOperation::GetGridSize() const -{ - if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ) - { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = dst_[0]->Height() * dst_[0]->Depth(); - const int grid_z = dst_[0]->Slices(); - return int3(grid_x, grid_y, grid_z); - } - if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1) - { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = dst_[0]->Height() * dst_[0]->Depth(); - const int grid_z = 1; - return int3(grid_x, grid_y, grid_z); - } - if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ) - { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = dst_[0]->Height(); - const int grid_z = dst_[0]->Depth(); - return int3(grid_x, grid_y, grid_z); - } - if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1) - { - const int grid_x = dst_[0]->Batch(); - const int grid_y = 1; - const int grid_z = 1; - return int3(grid_x, grid_y, grid_z); - } - return grid_size_; -} - -void GPUOperation::AddUniquePostfix(const std::string &unique_postfix) -{ - for (uint32_t i = 0; i < src_tensors_names_.size(); ++i) - { - src_tensors_names_[i] += unique_postfix; - } - for (uint32_t i = 0; i < dst_tensors_names_.size(); ++i) - { - dst_tensors_names_[i] += unique_postfix; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h deleted file mode 100644 index 4f531c629..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__ - -#include <string> -#include <vector> - -#include "TuningParameters.h" - -#include "open_cl/Arguments.h" -#include "open_cl/Buffer.h" -#include "open_cl/ClCommandQueue.h" -#include "open_cl/ClContext.h" -#include "open_cl/ClDevice.h" -#include "open_cl/ClKernel.h" -#include "open_cl/ClProgram.h" -#include "open_cl/DataType.h" -#include "open_cl/DeviceInfo.h" -#include "open_cl/Precision.h" -#include "open_cl/ProgramCache.h" -#include "open_cl/Tensor.h" -#include "open_cl/TensorType.h" -#include "open_cl/Types.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// kCustom: default value -// GPUOperation::GetGridSize must be overloaded -// kWBToX_HDToY_SToZ: -// grid_x = dst_[0]->Width() * dst_[0]->Batch(); -// grid_y = dst_[0]->Height() * dst_[0]->Depth(); -// grid_z = dst_[0]->Slices(); -// kWBToX_HDToY_ZIs1: -// grid_x = dst_[0]->Width() * dst_[0]->Batch(); -// grid_y = dst_[0]->Height() * dst_[0]->Depth(); -// grid_z = 1; -// kWBToX_HToY_DToZ: -// grid_x = dst_[0]->Width() * dst_[0]->Batch(); -// grid_y = dst_[0]->Height(); -// grid_z = dst_[0]->Depth(); -// kBToX_YIs1_ZIs1: -// grid_x = dst_[0]->Batch(); -// grid_y = 1; -// grid_z = 1; -enum class TensorToGrid -{ - kCustom, - kWBToX_HDToY_SToZ, - kWBToX_HDToY_ZIs1, - kWBToX_HToY_DToZ, - kBToX_YIs1_ZIs1 -}; - -struct CreationContext -{ - const CLDevice *device; - CLContext *context; - CLCommandQueue *queue; - ProgramCache *cache; - - const DeviceInfo &GetDeviceInfo() const { return device->info_; } -}; - -struct OperationDef -{ - CalculationsPrecision precision; - std::vector<TensorDescriptor> src_tensors; - std::vector<TensorDescriptor> dst_tensors; - - // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision - DataType GetDataType() const; - // Primary means the first src tensor, because first tensor usually defines - // the structure of kernel, all other resources(biases) types and etc. - DataType GetPrimaryDataType() const; - TensorStorageType GetPrimaryStorageType() const; - bool IsBatchSupported() const; -}; - -// GPUOperation represents some implementation of neural network operation on -// GPU. GPUOperation can contain another GPU operations with flag elementwise_. -// When GPUOperation contains another GPU ops, this GPUoperation replaces -// some sequence of operations Op + op0 + op1 + ... -// Because of this abilities of GPUOperation, usage scenario is next: -// Create instance of GPUOperation. -// Create all instances of GPUOperations that we will(probably) attach -// to GPUOperation. Attach all GPUOperations to GPUOperation. Call -// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it -// attached, it useless(and may be error) -class GPUOperation -{ -public: - GPUOperation() = default; - explicit GPUOperation(const OperationDef &definition); - virtual ~GPUOperation() = default; - // Move only - GPUOperation(GPUOperation &&operation); - GPUOperation &operator=(GPUOperation &&operation); - GPUOperation(const GPUOperation &) = delete; - GPUOperation &operator=(const GPUOperation &) = delete; - - absl::Status AddOperation(GPUOperation *operation); - - void SetSrc(Tensor *ptr, int index = 0); - void SetDst(Tensor *ptr, int index = 0); - - // should be called after changes of inputs/outputs. - absl::Status UpdateParams(); - - absl::Status AddToQueue(CLCommandQueue *queue) - { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return queue->Dispatch(kernel_, work_groups_count_, work_group_size_); - } - - virtual void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, - std::vector<int3> *work_groups) const; - - absl::Status Tune(const TuningParameters ¶ms); - - absl::Status AssembleCode(const DeviceInfo &device_info, CLContext *context); - - absl::Status Compile(const CreationContext &creation_context); - - absl::Status CompileDeserialized(const CreationContext &creation_context); - - virtual absl::Status PostCompileCheck(const DeviceInfo &, const KernelInfo &) - { - return absl::OkStatus(); - } - - const OperationDef &GetDefinition() const { return definition_; } - - void AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc); - void AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc); - void AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc); - - bool IsLinkable() const { return elementwise_ && linkable_; } - - // for linking - void AddUniquePostfix(const std::string &unique_postfix); - - Arguments args_; - std::string code_; - int3 work_group_size_ = int3(8, 4, 1); - std::vector<CompilerOptions> compiler_options_; - // not applicable to elementwise - TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom; - - bool elementwise_ = false; - // applicable only with elementwise_ = true; - bool linkable_ = true; // by default every elementwise is linkable - // applicable only with elementwise_ = true; - bool check_src_channels_size_ = false; - -protected: - virtual absl::Status BindArguments(ArgumentsBinder *) { return absl::OkStatus(); } - virtual int3 GetGridSize() const; - - // Defines operation calculation precision and format of src/dst tensors. - OperationDef definition_; - std::vector<Tensor *> src_; - std::vector<Tensor *> dst_; - CLKernel kernel_; - int grid_dimension_ = 3; // can be 1, 2 or 3 - int3 work_group_launch_order_ = int3(0, 1, 2); - int3 grid_size_ = int3(0, 0, 0); - std::vector<std::string> src_tensors_names_; - std::vector<std::string> dst_tensors_names_; - -private: - int3 work_groups_count_ = int3(0, 0, 0); - int linkable_count_ = 0; - std::string elementwise_code_; // temporary, used during op construction -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc deleted file mode 100644 index ceeab2f39..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Pooling.h" - -#include <string> - -#include "Util.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::string GetAveragePoolingKernelCode(const OperationDef &op_def, bool stride_correction, - GPUOperation *op) -{ - auto src_desc = op_def.src_tensors[0]; - - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); - - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddSrcTensor("src_tensor", src_desc); - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddDstTensor("dst_tensor", dst_desc); - - std::map<Axis, std::string> axis_to_src_coord = { - {Axis::WIDTH, "x_c"}, {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"}, - {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"}, - }; - - std::map<Axis, std::string> axis_to_dst_coord = { - {Axis::WIDTH, "X"}, {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"}, - {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"}, - }; - - std::vector<std::string> src_coords; - std::vector<std::string> dst_coords; - for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS}) - { - if (op_def.dst_tensors[0].HasAxis(axis)) - { - dst_coords.push_back(axis_to_dst_coord[axis]); - } - if (op_def.src_tensors[0].HasAxis(axis)) - { - src_coords.push_back(axis_to_src_coord[axis]); - } - } - std::string src_coord = src_coords[0]; - for (size_t i = 1; i < src_coords.size(); ++i) - { - src_coord += ", " + src_coords[i]; - } - std::string dst_coord = dst_coords[0]; - for (size_t i = 1; i < dst_coords.size(); ++i) - { - dst_coord += ", " + dst_coords[i]; - } - - const bool manual_clamp = op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER || - op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER; - - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int linear_id_1 = get_global_id(1);\n"; - c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n"; - c += " int D = linear_id_1 % args.dst_tensor.Depth();\n"; - } - else - { - c += " int Y = get_global_id(1);\n"; - } - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - c += " float4 r = (float4)(0.0f);\n"; - c += " float window_size = 0.0;\n"; - if (stride_correction) - { - c += " int xs = " + - GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") + - ";\n"; - } - else - { - if (op_def.IsBatchSupported()) - { - c += " int xs = X * args.stride_x + args.padding_x * " - "args.src_tensor.Batch();\n"; - } - else - { - c += " int xs = X * args.stride_x + args.padding_x;\n"; - } - } - c += " int ys = Y * args.stride_y + args.padding_y;\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int ds = D * args.stride_z + args.padding_z;\n"; - c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n"; - c += " int d_c = ds + kz;\n"; - c += " if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n"; - } - c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n"; - c += " int y_c = ys + ky;\n"; - c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n"; - c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n"; - if (op_def.IsBatchSupported()) - { - c += " int x_c = xs + kx * args.src_tensor.Batch();\n"; - } - else - { - c += " int x_c = xs + kx;\n"; - } - c += " bool outside = outside_y || x_c < 0 || x_c >= " - "args.src_tensor.Width();\n"; - if (manual_clamp) - { - c += " r += !outside ? args.src_tensor.Read<float>(" + src_coord + - ") : " - "(float4)(0.0f);\n"; - } - else - { - c += " r += args.src_tensor.Read<float>(" + src_coord + ");\n"; - } - c += " window_size += !outside ? 1.0 : 0.0;\n"; - c += " }\n"; - c += " }\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " } // Depth\n"; - } - // If window_size==0, window covered nothing. This situation is a sign of - // incorrectly constructed operation. NaNs are expected as output. - c += " FLT4 result = TO_FLT4(r / window_size);\n"; - c += " args.dst_tensor.Write(result, " + dst_coord + ");\n"; - c += "}\n"; - - return c; -} - -std::string GetMaxPoolingKernelCode(const OperationDef &op_def, bool stride_correction, - bool output_indices, GPUOperation *op) -{ - auto src_desc = op_def.src_tensors[0]; - if (op_def.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddSrcTensor("src_tensor", src_desc); - auto dst_desc = op_def.dst_tensors[0]; - if (op_def.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddDstTensor("dst_tensor", dst_desc); - if (output_indices) - { - auto dst_ind_desc = op_def.dst_tensors[1]; - if (op_def.IsBatchSupported()) - { - dst_ind_desc.SetStateVar("BatchedWidth", "true"); - } - op->AddDstTensor("dst_indices", dst_ind_desc); - } - - std::map<Axis, std::string> axis_to_src_coord = { - {Axis::WIDTH, "x_c"}, {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"}, - {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"}, - }; - - std::map<Axis, std::string> axis_to_dst_coord = { - {Axis::WIDTH, "X"}, {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"}, - {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"}, - }; - - std::vector<std::string> src_coords; - std::vector<std::string> dst_coords; - for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS}) - { - if (op_def.dst_tensors[0].HasAxis(axis)) - { - dst_coords.push_back(axis_to_dst_coord[axis]); - } - if (op_def.src_tensors[0].HasAxis(axis)) - { - src_coords.push_back(axis_to_src_coord[axis]); - } - } - std::string src_coord = src_coords[0]; - for (size_t i = 1; i < src_coords.size(); ++i) - { - src_coord += ", " + src_coords[i]; - } - std::string dst_coord = dst_coords[0]; - for (size_t i = 1; i < dst_coords.size(); ++i) - { - dst_coord += ", " + dst_coords[i]; - } - - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int linear_id_1 = get_global_id(1);\n"; - c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n"; - c += " int D = linear_id_1 % args.dst_tensor.Depth();\n"; - } - else - { - c += " int Y = get_global_id(1);\n"; - } - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - c += " FLT4 maximum = (FLT4)(-10000.0f);\n"; - if (output_indices) - { - c += " FLT4 indexes = (FLT4)(0.0f);\n"; - } - if (stride_correction) - { - c += " int xs = " + - GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") + - ";\n"; - } - else - { - if (op_def.IsBatchSupported()) - { - c += " int xs = X * args.stride_x + args.padding_x * " - "args.src_tensor.Batch();\n"; - } - else - { - c += " int xs = X * args.stride_x + args.padding_x;\n"; - } - } - c += " int ys = Y * args.stride_y + args.padding_y;\n"; - c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n"; - c += " int y_c = ys + ky;\n"; - c += " if (y_c < 0 || y_c >= args.src_tensor.Height()) continue;\n"; - c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n"; - if (op_def.IsBatchSupported()) - { - c += " int x_c = xs + kx * args.src_tensor.Batch();\n"; - } - else - { - c += " int x_c = xs + kx;\n"; - } - c += " if (x_c < 0 || x_c >= args.src_tensor.Width()) continue;\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " int ds = D * args.stride_z + args.padding_z;\n"; - c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n"; - c += " int d_c = ds + kz;\n"; - c += " if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n"; - } - c += " FLT4 src = args.src_tensor.Read(" + src_coord + ");\n"; - if (output_indices) - { - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " FLT index_counter = (FLT)((ky * args.kernel_size_x + kx) * " - "args.kernel_size_z + kz) + (FLT)(0.1f);\n"; - } - else - { - c += " FLT index_counter = (FLT)(ky * args.kernel_size_x + kx) + " - "(FLT)(0.1f);\n"; - } - c += " if (src.x > maximum.x) {\n"; - c += " indexes.x = index_counter;\n"; - c += " maximum.x = src.x;\n"; - c += " }\n"; - c += " if (src.y > maximum.y) {\n"; - c += " indexes.y = index_counter;\n"; - c += " maximum.y = src.y;\n"; - c += " }\n"; - c += " if (src.z > maximum.z) {\n"; - c += " indexes.z = index_counter;\n"; - c += " maximum.z = src.z;\n"; - c += " }\n"; - c += " if (src.w > maximum.w) {\n"; - c += " indexes.w = index_counter;\n"; - c += " maximum.w = src.w;\n"; - c += " }\n"; - } - else - { - c += " maximum = max(src, maximum);\n"; - } - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) - { - c += " } // Depth\n"; - } - c += " }\n"; - c += " }\n"; - c += " args.dst_tensor.Write(maximum, " + dst_coord + ");\n"; - if (output_indices) - { - c += " args.dst_indices.Write(indexes, " + dst_coord + ");\n"; - } - c += "}\n"; - - return c; -} -} // namespace - -GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr) -{ - GPUOperation op(definition); - op.args_.AddInt("kernel_size_x", attr.kernel.w); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("kernel_size_y", attr.kernel.h); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("stride_y", attr.strides.h); - - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - if (attr.type == PoolingType::AVERAGE) - { - op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op); - } - else if (attr.type == PoolingType::MAX) - { - op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op); - } - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - return op; -} - -GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr) -{ - GPUOperation op(definition); - op.args_.AddInt("kernel_size_x", attr.kernel.w); - op.args_.AddInt("padding_x", -attr.padding.prepended.w); - op.args_.AddInt("stride_x", attr.strides.w); - op.args_.AddInt("kernel_size_y", attr.kernel.h); - op.args_.AddInt("padding_y", -attr.padding.prepended.h); - op.args_.AddInt("stride_y", attr.strides.h); - op.args_.AddInt("kernel_size_z", attr.kernel.d); - op.args_.AddInt("padding_z", -attr.padding.prepended.d); - op.args_.AddInt("stride_z", attr.strides.d); - const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1; - if (attr.type == PoolingType::AVERAGE) - { - op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op); - } - else if (attr.type == PoolingType::MAX) - { - op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op); - } - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h deleted file mode 100644 index 166d81591..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__ - -#include "GpuOperation.h" - -#include "open_cl/Operations.h" -#include "open_cl/Precision.h" -#include "open_cl/ClKernel.h" -#include "open_cl/Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr); - -GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc deleted file mode 100644 index 37f87e599..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Relu.h" - -#include <string> -#include "Util.h" -#include "GpuOperation.h" -#include "absl/strings/str_cat.h" -#include "open_cl/Precision.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr) -{ - GPUOperation op(definition); - op.elementwise_ = true; - - std::string min_func; - if (attr.alpha != 0.0f) - { - min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))"; - if (definition.precision == CalculationsPrecision::F32) - { - op.args_.AddFloat("alpha", attr.alpha); - } - else - { -#ifdef FIXME_PORTING_HALF_REQIRED - op.args_.AddHalf("alpha", half(attr.alpha)); -#endif - } - } - else - { - min_func = "(FLT)(0.0f)"; - } - if (attr.clip != 0.0f) - { - if (definition.precision == CalculationsPrecision::F32) - { - op.args_.AddFloat("clip", attr.clip); - } - else - { -#ifdef FIXME_PORTING_HALF_REQIRED - op.args_.AddHalf("clip", half(attr.clip)); -#endif - } - op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + ", args.clip);"); - } - else - { - op.code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");"); - } - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h deleted file mode 100644 index eb6b1ad1d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__ -#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__ - -#include "open_cl/ClKernel.h" -#include "GpuOperation.h" -#include "open_cl/Precision.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" -#include "open_cl/Operations.h" -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc deleted file mode 100644 index cdd3e8364..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Reshape.h" - -#include <string> - -#include "Util.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ -std::string GetReshapeCode(const OperationDef &op_def) -{ - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int linear_id = get_global_id(0);\n"; - c += " int X = linear_id / args.dst_tensor.Batch();\n"; - c += " int B = linear_id % args.dst_tensor.Batch();\n"; - c += " args.dst_tensor.SetBatchRef(B);\n"; - } - else - { - c += " int X = get_global_id(0);\n"; - } - c += " int Y = get_global_id(1);\n"; - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - c += " FLT temps[4];\n"; - c += " temps[0] = (FLT)(0.0f);\n"; - c += " temps[1] = (FLT)(0.0f);\n"; - c += " temps[2] = (FLT)(0.0f);\n"; - c += " temps[3] = (FLT)(0.0f);\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int base = B;\n"; - } - else - { - c += " int base = 0;\n"; - } - c += " base = ((base * args.dst_tensor.Height() + Y) * " - "args.dst_tensor.Width() + X) * args.dst_tensor.Channels() + Z * 4;\n"; - c += " for (int i = 0; i < 4; ++i) {\n"; - c += " int dst_channel = Z * 4 + i;\n"; - c += " if (dst_channel < args.dst_tensor.Channels()) {;\n"; - c += " int p = base + i;\n"; - c += " int src_c = p % args.src_tensor.Channels();\n"; - c += " p = p / args.src_tensor.Channels();\n"; - c += " int src_x = p % args.src_tensor.Width();\n"; - c += " p = p / args.src_tensor.Width();\n"; - c += " int src_y = p % args.src_tensor.Height();\n"; - if (op_def.src_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int src_b = p / args.src_tensor.Height();\n"; - c += " args.src_tensor.SetBatchRef(src_b);\n"; - } - c += " int src_z = src_c / 4;\n"; - c += " int src_sub_ch = src_c % 4;\n"; - c += " FLT4 t = args.src_tensor.Read(src_x, src_y, src_z);\n"; - c += " FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n"; - c += " temps[i] = t_ar[src_sub_ch];\n"; - c += " }\n"; - c += " }\n"; - c += " FLT4 result = (FLT4)(temps[0], temps[1], temps[2], temps[3]);\n"; - c += " args.dst_tensor.Write(result, X, Y, Z);\n"; - c += "}\n"; - return c; -} - -} // namespace - -GPUOperation CreateReshape(const OperationDef &definition) -{ - GPUOperation op(definition); - op.AddSrcTensor("src_tensor", definition.src_tensors[0]); - op.AddDstTensor("dst_tensor", definition.dst_tensors[0]); - op.code_ = GetReshapeCode(definition); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h deleted file mode 100644 index 4f7c5ea38..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__ - -#include "GpuOperation.h" - -#include "open_cl/Operations.h" -#include "open_cl/Precision.h" -#include "open_cl/ClKernel.h" -#include "open_cl/Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreateReshape(const OperationDef &definition); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc deleted file mode 100644 index 13010e791..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Reshape.h" - -#include <string> - -#include "Util.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::string GetReshapeCode(const OperationDef &op_def) -{ - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int linear_id = get_global_id(0);\n"; - c += " int X = linear_id / args.dst_tensor.Batch();\n"; - c += " int B = linear_id % args.dst_tensor.Batch();\n"; - c += " args.dst_tensor.SetBatchRef(B);\n"; - } - else - { - c += " int X = get_global_id(0);\n"; - } - c += " int Y = get_global_id(1);\n"; - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int dst_bhwc4 = B;\n"; - } - else - { - c += " int dst_bhwc4 = 0;\n"; - } - c += " dst_bhwc4 = ((dst_bhwc4 * args.dst_tensor.Height() + Y) * " - "args.dst_tensor.Width() + X) * args.dst_tensor.Slices() + Z;\n"; - c += " int src_z = dst_bhwc4 % args.src_tensor.Slices();\n"; - c += " dst_bhwc4 = dst_bhwc4 / args.src_tensor.Slices();\n"; - c += " int src_x = dst_bhwc4 % args.src_tensor.Width();\n"; - c += " dst_bhwc4 = dst_bhwc4 / args.src_tensor.Width();\n"; - c += " int src_y = dst_bhwc4 % args.src_tensor.Height();\n"; - if (op_def.src_tensors[0].HasAxis(Axis::BATCH)) - { - c += " int src_b = dst_bhwc4 / args.src_tensor.Height();\n"; - c += " args.src_tensor.SetBatchRef(src_b);\n"; - } - c += " FLT4 result = args.src_tensor.Read(src_x, src_y, src_z);\n"; - c += " args.dst_tensor.Write(result, X, Y, Z);\n"; - c += "}\n"; - return c; -} - -} // namespace - -GPUOperation CreateReshapex4(const OperationDef &definition) -{ - GPUOperation op(definition); - op.AddSrcTensor("src_tensor", definition.src_tensors[0]); - op.AddDstTensor("dst_tensor", definition.dst_tensors[0]); - op.code_ = GetReshapeCode(definition); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h deleted file mode 100644 index 8988e8bd4..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__ -#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__ - -#include "GpuOperation.h" - -#include "open_cl/Operations.h" -#include "open_cl/Precision.h" -#include "open_cl/ClKernel.h" -#include "open_cl/Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0 -GPUOperation CreateReshapex4(const OperationDef &definition); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc deleted file mode 100644 index 4ee164d82..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Softmax.h" - -#include <string> - -#include "Util.h" -#include "WorkGroupPicking.h" -#include "GpuOperation.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -namespace -{ -std::string GetSoftmaxKernelCode(const OperationDef &op_def) -{ - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " int X = get_global_id(0);\n"; - c += " int Y = get_global_id(1);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) " - "return; \n"; - c += " float sum = 0.0f;\n"; - c += " for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n"; - c += " float4 t = args.src_tensor.Read<float>(X, Y, d);\n"; - c += " sum += exp(t.x);\n"; - c += " if (d * 4 + 1 < args.dst_tensor.Channels()) sum += exp(t.y);\n"; - c += " if (d * 4 + 2 < args.dst_tensor.Channels()) sum += exp(t.z);\n"; - c += " if (d * 4 + 3 < args.dst_tensor.Channels()) sum += exp(t.w);\n"; - c += " }\n"; - c += " for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n"; - c += " float4 t = args.src_tensor.Read<float>(X, Y, d);\n"; - c += " t = exp(t) / sum;\n"; - c += " FLT4 result = TO_FLT4(t);\n"; - c += " args.dst_tensor.Write(result, X, Y, d);\n"; - c += " }\n"; - c += "}\n"; - return c; -} -} // namespace - -GPUOperation CreateSoftmax(const OperationDef &definition) -{ - GPUOperation op(definition); - auto src_desc = definition.src_tensors[0]; - if (definition.IsBatchSupported()) - { - src_desc.SetStateVar("BatchedWidth", "true"); - } - op.AddSrcTensor("src_tensor", src_desc); - auto dst_desc = definition.dst_tensors[0]; - if (definition.IsBatchSupported()) - { - dst_desc.SetStateVar("BatchedWidth", "true"); - } - op.AddDstTensor("dst_tensor", dst_desc); - op.code_ = GetSoftmaxKernelCode(definition); - op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1; - return op; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h deleted file mode 100644 index 594bab042..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__ -#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__ - -#include "open_cl/ClKernel.h" -#include "GpuOperation.h" -#include "open_cl/Precision.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -GPUOperation CreateSoftmax(const OperationDef &definition); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc deleted file mode 100644 index 590952dca..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Softmax1x1.h" - -#include <string> - -#include "Util.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -Softmax1x1::Softmax1x1(const OperationDef &definition) : GPUOperation(definition) -{ - work_group_size_ = int3(32, 1, 1); - code_ = GetSoftmaxKernelCode(definition_); -} - -Softmax1x1::Softmax1x1(Softmax1x1 &&kernel) : GPUOperation(std::move(kernel)) {} - -Softmax1x1 &Softmax1x1::operator=(Softmax1x1 &&kernel) -{ - if (this != &kernel) - { - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} - -std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef &op_def) -{ - AddSrcTensor("src_tensor", op_def.src_tensors[0]); - AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - args_.AddFloat("mask_x"); - args_.AddFloat("mask_y"); - args_.AddFloat("mask_z"); - args_.AddFloat("mask_w"); - args_.AddInt("slices_x32"); - - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.IsBatchSupported()) - { - c += " int batch_id = get_global_id(1);\n"; - c += " if (batch_id >= args.dst_tensor.Batch()) return;\n"; - c += " args.dst_tensor.SetBatchRef(batch_id);\n"; - c += " args.src_tensor.SetBatchRef(batch_id);\n"; - } - c += " float4 mask = (float4)(args.mask_x, args.mask_y, args.mask_z, " - "args.mask_w);\n"; - c += " int offset = 0;\n"; - c += " float sum = 0.0f;\n"; - c += " int s = 0;\n"; - c += " int tid = get_local_id(0);\n"; - c += " do {\n"; - c += " int z = offset + tid;\n"; - c += " if (z < args.dst_tensor.Slices()) {\n"; - c += " float4 mask_temp = z == args.dst_tensor.Slices() - 1 ? mask : " - "(float4)(1.0f);\n"; - c += " float4 src = args.src_tensor.Read<float>(0, 0, z);\n"; - c += " sum += dot(mask_temp, exp(src));\n"; - c += " offset += 32;\n"; - c += " }\n"; - c += " s++;\n"; - c += " } while (s < args.slices_x32);\n"; - c += "\n"; - c += " __local float4 tmp[8];\n"; - c += " __local float* tmpx1 = (__local float*)tmp;\n"; - c += " tmpx1[tid] = sum;\n"; - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - c += " if (tid == 0) {\n"; - c += " sum = dot((float4)(1.0f), tmp[0]);\n"; - c += " sum += dot((float4)(1.0f), tmp[1]);\n"; - c += " sum += dot((float4)(1.0f), tmp[2]);\n"; - c += " sum += dot((float4)(1.0f), tmp[3]);\n"; - c += " sum += dot((float4)(1.0f), tmp[4]);\n"; - c += " sum += dot((float4)(1.0f), tmp[5]);\n"; - c += " sum += dot((float4)(1.0f), tmp[6]);\n"; - c += " sum += dot((float4)(1.0f), tmp[7]);\n"; - c += " tmpx1[0] = 1.0f / sum;\n"; - c += " }\n"; - c += " barrier(CLK_LOCAL_MEM_FENCE);\n"; - c += " sum = tmpx1[0];\n"; - c += "\n"; - c += " offset = 0;\n"; - c += " s = 0;\n"; - c += " do {\n"; - c += " int z = offset + tid;\n"; - c += " if (z < args.dst_tensor.Slices()) {\n"; - c += " FLT4 res = TO_FLT4(exp(args.src_tensor.Read<float>(0, 0, " - "z))*sum);\n"; - c += " args.dst_tensor.Write(res, 0, 0, z);\n"; - c += " offset += 32;\n"; - c += " }\n"; - c += " s++;\n"; - c += " } while (s < args.slices_x32);\n"; - c += "}\n"; - return c; -} - -absl::Status Softmax1x1::BindArguments(ArgumentsBinder *args) -{ - float4 mask = GetMaskForLastPlane(src_[0]->Channels()); - RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x)); - RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y)); - RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z)); - RETURN_IF_ERROR(args->SetFloat("mask_w", mask.w)); - RETURN_IF_ERROR(args->SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32))); - return absl::OkStatus(); -} - -int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); } - -Softmax1x1 CreateSoftmax1x1(const OperationDef &definition) { return Softmax1x1(definition); } - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h deleted file mode 100644 index da375d457..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__ -#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__ - -#include "GpuOperation.h" - -#include "open_cl/Precision.h" -#include "open_cl/ClKernel.h" -#include "open_cl/Tensor.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -class Softmax1x1 : public GPUOperation -{ -public: - Softmax1x1() = default; - explicit Softmax1x1(const OperationDef &definition); - - absl::Status BindArguments(ArgumentsBinder *args) override; - int3 GetGridSize() const override; - - // Move only - Softmax1x1(Softmax1x1 &&kernel); - Softmax1x1 &operator=(Softmax1x1 &&kernel); - Softmax1x1(const Softmax1x1 &) = delete; - Softmax1x1 &operator=(const Softmax1x1 &) = delete; - - friend Softmax1x1 CreateSoftmax1x1(); - -private: - std::string GetSoftmaxKernelCode(const OperationDef &op_def); -}; - -Softmax1x1 CreateSoftmax1x1(const OperationDef &definition); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h deleted file mode 100644 index 3d99b4fda..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__ -#define __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__ - -#include "open_cl/ClCommandQueue.h" -#include "open_cl/DeviceInfo.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -enum class TuningType -{ - EXHAUSTIVE, - FAST -}; - -struct TuningParameters -{ - ProfilingCommandQueue *queue; - const DeviceInfo *info; - TuningType tuning_type = TuningType::EXHAUSTIVE; -}; - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc deleted file mode 100644 index df42c66e8..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Util.h" - -#include <cfloat> -#include <cmath> -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "absl/strings/substitute.h" -#include "open_cl/Precision.h" -#include "open_cl/DataType.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::string GetCommonDefines(CalculationsPrecision precision) -{ - std::string result; - - switch (precision) - { - case CalculationsPrecision::F32: - result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; - result += "#define ACCUM_FLT4 float4\n"; - result += "#define FLT float\n"; - result += "#define FLT2 float2\n"; - result += "#define FLT3 float3\n"; - result += "#define FLT4 float4\n"; - result += "#define TO_FLT4 convert_float4\n"; - result += "#define TO_ACCUM_TYPE convert_float4\n"; - result += "#define TO_ACCUM_FLT convert_float\n"; - break; - case CalculationsPrecision::F16: - result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; - result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - result += "#define ACCUM_FLT4 half4\n"; - result += "#define FLT half\n"; - result += "#define FLT2 half2\n"; - result += "#define FLT3 half3\n"; - result += "#define FLT4 half4\n"; - result += "#define TO_FLT4 convert_half4\n"; - result += "#define TO_ACCUM_TYPE convert_half4\n"; - result += "#define TO_ACCUM_FLT convert_half\n"; - break; - case CalculationsPrecision::F32_F16: - result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; - result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - result += "#define ACCUM_FLT4 float4\n"; - result += "#define FLT half\n"; - result += "#define FLT2 half2\n"; - result += "#define FLT3 half3\n"; - result += "#define FLT4 half4\n"; - result += "#define TO_FLT4 convert_half4\n"; - result += "#define TO_ACCUM_TYPE convert_float4\n"; - result += "#define TO_ACCUM_FLT convert_float\n"; - break; - } - return result; -} - -std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size, - const std::string &stride_x, const std::string &padding_x) -{ - // int p0 = src_x / batch_size;\n"; - // int b0 = src_x % batch_size;\n"; - // return (p0 * stride_x + padding_x) * batch_size + b0;\n"; - return absl::Substitute("(((($0) / $1) * $2 + $3) * $1 + ($0) % $1)", src_x, batch_size, stride_x, - padding_x); -} - -float4 GetMaskForLastPlane(int channels) -{ - float4 mask = float4(0.0f); - const int reminder = channels % 4 == 0 ? 4 : channels % 4; - for (int i = 0; i < reminder; ++i) - { - mask[i] = 1.0f; - } - return mask; -} - -int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size) -{ - for (const auto &wg : wgs) - { - const int wg_size = wg.x * wg.y * wg.z; - if (wg_size <= max_wg_size) - { - return wg; - } - } - return {1, 1, 1}; -} - -int GetRecommendedBlockSizeForConv(const DeviceInfo &device_info, CalculationsPrecision precision, - int task_size) -{ - const float task_size_per_cu = task_size / static_cast<float>(device_info.compute_units_count); - int block_size = 1; - float threshold_1 = FLT_MAX; - float threshold_2 = FLT_MAX; - float threshold_4 = FLT_MAX; - if (!device_info.IsMali()) - { - return 1; - } - MaliInfo mali_info = device_info.mali_info; - switch (precision) - { - case CalculationsPrecision::F16: - if (mali_info.IsBifrostGen1()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 4.0f; - threshold_4 = 256.0f * 8.0f; - } - else if (mali_info.IsBifrostGen2()) - { - threshold_1 = 256.0f * 2.0f; - threshold_2 = 256.0f * 8.0f; - threshold_4 = 256.0f * 16.0f; - } - else if (mali_info.IsBifrostGen3() || mali_info.IsValhall()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 6.0f; - threshold_4 = 256.0f * 16.0f; - } - else if (mali_info.IsMidgard()) - { - threshold_1 = 256.0f * 4.0f; - threshold_2 = 256.0f * 16.0f; - } - break; - case CalculationsPrecision::F32_F16: - if (mali_info.IsBifrostGen1()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 3.0f; - threshold_4 = 256.0f * 32.0f; - } - else if (mali_info.IsBifrostGen2()) - { - threshold_1 = 256.0f * 2.0f; - threshold_2 = 256.0f * 8.0f; - } - else if (mali_info.IsBifrostGen3() || mali_info.IsValhall()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 8.0f; - } - else if (mali_info.IsMidgard()) - { - threshold_1 = 256.0f * 4.0f; - } - break; - case CalculationsPrecision::F32: - if (mali_info.IsBifrostGen1()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 4.0f; - } - else if (mali_info.IsBifrostGen2()) - { - threshold_1 = 128.0f; - threshold_2 = 256.0f * 4.0f; - } - else if (mali_info.IsBifrostGen3() || mali_info.IsValhall()) - { - threshold_1 = 256.0f; - threshold_2 = 256.0f * 12.0f; - } - else if (mali_info.IsMidgard()) - { - threshold_1 = 256.0f * 16.0f; - } - break; - } - if (task_size_per_cu <= threshold_1) - { - block_size = 1; - } - else if (task_size_per_cu <= threshold_2) - { - block_size = 2; - } - else if (task_size_per_cu <= threshold_4) - { - block_size = 4; - } - else - { - block_size = 8; - } - return block_size; -} - -int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size) -{ - int3 work_groups_count; - work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x); - work_groups_count.y = DivideRoundUp(grid_size.y, work_group_size.y); - work_groups_count.z = DivideRoundUp(grid_size.z, work_group_size.z); - return work_groups_count; -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h deleted file mode 100644 index 8363862c1..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__ - -#include <string> -#include <vector> - -#include "open_cl/DeviceInfo.h" -#include "open_cl/Precision.h" -#include "open_cl/DataType.h" -#include "open_cl/Shape.h" -#include "open_cl/Tensor.h" -#include "open_cl/Types.h" -#include "open_cl/Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::string GetCommonDefines(CalculationsPrecision precision); - -// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts -// with B after W (for example HWBC4) and WB stored in one axis of GPU -// resources. -std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size, - const std::string &stride_x, const std::string &padding_x); - -// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts -// with B after W (for example HWBC4) and WB stored in one axis of GPU -// resources. -std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size, - const std::string &stride_x, const std::string &padding_x); - -// Returns float4 mask for last plane(batch of 4 channels) -// assumes that plane size is 4; -// for example we have 7 channels, in our data structures we align it to 8 -// but 8s-channel will be empty, then last plane (batch of 4 channels) will -// have this mask (1, 1, 1, 0). -float4 GetMaskForLastPlane(int channels); - -// returns first work group from wgs that has size not bigger than max_wg_size -// if no suitable groups among wgs, returns {1, 1, 1} -int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size); - -// task_size as amount of FLT4 processed elements. -int GetRecommendedBlockSizeForConv(const DeviceInfo &device, CalculationsPrecision precision, - int task_size); - -int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size); -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc deleted file mode 100644 index 214fec271..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "WorkGroupPicking.h" - -#include <algorithm> -#include <limits> -#include <set> -#include <vector> - -#include "open_cl/Util.h" -#include "open_cl/Types.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -namespace -{ - -std::vector<int2> Get2DWorkgroupsEqualTo128() -{ - return {{128, 1}, {64, 2}, {32, 4}, {16, 8}, {8, 16}, {4, 32}, {2, 64}, {1, 128}}; -} - -std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(int multiplier, int3 grid, - const KernelInfo &kernel_info, - const DeviceInfo &device_info, - WorkGroupSizeAlignment z_alignment) -{ - std::vector<int3> work_groups; - work_groups.reserve(32); - - std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment); - - for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2) - { - for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2) - { - int work_group_size_xy = x * y; - if (work_group_size_xy % multiplier != 0 || - work_group_size_xy > kernel_info.max_work_group_size) - { - continue; - } - for (auto z : possible_z_sizes) - { - if (work_group_size_xy * z > kernel_info.max_work_group_size) - { - continue; - } - if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y && - z <= device_info.max_work_group_size_z) - { - work_groups.push_back({x, y, z}); - } - } - } - } - return work_groups; -} - -std::vector<int3> GenerateWorkGroupSizesXMultipleOf(int multiplier, int3 grid, - const KernelInfo &kernel_info, - const DeviceInfo &device_info, - WorkGroupSizeAlignment z_alignment) -{ - std::vector<int3> work_groups; - work_groups.reserve(32); - - std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment); - std::vector<int> possible_y_sizes = GetPossibleSizes(grid.y, WorkGroupSizeAlignment::PRECISE); - - for (int x = multiplier; x <= kernel_info.max_work_group_size && x < grid.x + multiplier; - x += multiplier) - { - for (auto y : possible_y_sizes) - { - for (auto z : possible_z_sizes) - { - if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y && - z <= device_info.max_work_group_size_z && x * y * z <= kernel_info.max_work_group_size) - { - work_groups.push_back({x, y, z}); - } - } - } - } - return work_groups; -} - -void GetWorkGroupsAlignedToGrid(const DeviceInfo &device_info, const KernelInfo &kernel_info, - const int3 &grid, std::vector<int3> *work_groups) -{ - int3 max_wg_size; - max_wg_size.x = device_info.max_work_group_size_x; - max_wg_size.y = device_info.max_work_group_size_y; - max_wg_size.z = device_info.max_work_group_size_z; - GenerateWorkGroupSizesAlignedToGrid(grid, max_wg_size, kernel_info.max_work_group_size, - work_groups); -} - -int GetPenalty(int grid_size, int group_size) -{ - const int reminder = grid_size % group_size; - return reminder == 0 ? 0 : group_size - reminder; -} - -int GetPenalty(int2 grid_size, int2 group_size) -{ - const int p_x = GetPenalty(grid_size.x, group_size.x); - const int p_y = GetPenalty(grid_size.y, group_size.y); - return p_x * grid_size.y + p_y * grid_size.x + p_x * p_y; -} - -int GetMaxSizeWithMinPenalty(int size, int max_size) -{ - int best_size = 128; - int min_penalty = GetPenalty(size, best_size); - for (int i = 2; i * 128 <= max_size; ++i) - { - if (GetPenalty(size, i * 128) == min_penalty) - { - best_size = i * 128; - } - } - return best_size; -} - -int2 GetMaxSizeWithMinPenalty(int2 size, int max_size) -{ - std::vector<int2> base_groups = Get2DWorkgroupsEqualTo128(); - int min_penalty = std::numeric_limits<int>::max(); - for (const auto &group : base_groups) - { - min_penalty = std::min(GetPenalty(size, group), min_penalty); - } - for (const auto &group : base_groups) - { - for (int y = 1; y * group.y <= max_size; ++y) - { - int new_group_y = y * group.y; - for (int x = 1; x * group.x <= max_size; ++x) - { - int new_group_x = x * group.x; - if (new_group_x * new_group_y > max_size) - { - break; - } - if (GetPenalty(size, int2(new_group_x, new_group_y)) == min_penalty) - { - return int2(new_group_x, new_group_y); - } - } - } - } - return int2(0, 0); -} - -int GetBiggestDividerWithPriority(int number, int max_divider) -{ - if (number % 8 == 0 && 8 <= max_divider) - { - return 8; - } - if (number % 4 == 0 && 4 <= max_divider) - { - return 4; - } - if (number % 2 == 0 && 2 <= max_divider) - { - return 2; - } - for (int i = max_divider; i != 0; i--) - { - if (number % i == 0) - { - return i; - } - } - return 1; -} - -int GetBiggestDivider(int number, int max_divider) -{ - for (int i = max_divider; i != 0; i--) - { - if (number % i == 0) - { - return i; - } - } - return 1; -} - -} // namespace - -int3 GetWorkGroupXY128ConvLinear(const int3 &grid) -{ - int grid_z = GetBiggestDividerWithPriority(grid.z, 4); - if (grid.x <= 128) - { - return int3(128, 1, grid_z); - } - int grid_x = GetMaxSizeWithMinPenalty(grid.x, 512 / grid_z); - return {grid_x, 1, grid_z}; -} - -int3 GetWorkGroupXY128Conv(const int3 &grid) -{ - int grid_z = GetBiggestDividerWithPriority(grid.z, 4); - if (grid.x <= 16 && grid.y <= 8) - { - return int3(16, 8, grid_z); - } - int2 grid_xy = GetMaxSizeWithMinPenalty(int2(grid.x, grid.y), 512 / grid_z); - return int3(grid_xy.x, grid_xy.y, grid_z); -} - -// int3 GetWorkGroupXY128Simple(const int3& grid) { return int3(16, 8, 1); } - -int3 GetWorkGroup(const int3 &grid, int max_size) -{ - int wg_z = GetBiggestDividerWithPriority(grid.z, 8); - int wg_xy_size = max_size / wg_z; - int wg_x = std::min(DivideRoundUp(grid.x, 2), wg_xy_size); - int wg_y = std::min(wg_xy_size / wg_x, grid.y); - return int3(wg_x, wg_y, wg_z); -} - -int3 GetWorkGroupConv(const int3 &grid, int max_size, int max_z_size) -{ - int wg_z = GetBiggestDivider(grid.z, max_z_size); - int wg_xy_size = std::min(256, max_size) / wg_z; - int wg_x = std::min(grid.x, wg_xy_size); - int wg_y = std::min(wg_xy_size / wg_x, grid.y); - if (wg_y == grid.y && grid.y % 2 == 0) - { - wg_y = grid.y / 2; - } - return int3(wg_x, wg_y, wg_z); -} - -void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - WorkGroupSizeAlignment z_alignment, - std::vector<int3> *work_groups) -{ - *work_groups = - GenerateWorkGroupSizesXYMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment); -} - -void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - WorkGroupSizeAlignment z_alignment, - std::vector<int3> *work_groups) -{ - *work_groups = - GenerateWorkGroupSizesXMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment); -} - -bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height) -{ - int planar_work_groups = DivideRoundUp(width * height, 128); - auto base_work_groups = Get2DWorkgroupsEqualTo128(); - bool have_equal_work_groups = false; - for (auto &work_group : base_work_groups) - { - int x_groups = DivideRoundUp(width, work_group.x); - int y_groups = DivideRoundUp(height, work_group.y); - int xy_groups = x_groups * y_groups; - if (xy_groups == planar_work_groups) - { - have_equal_work_groups = true; - break; - } - } - return !have_equal_work_groups; -} - -void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - std::vector<int3> *work_groups) -{ - switch (tuning_type) - { - case TuningType::FAST: - work_groups->push_back(GetWorkGroup(grid, kernel_info.max_work_group_size)); - return; - case TuningType::EXHAUSTIVE: - { - GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); - return; - } - default: - work_groups->push_back({8, 4, 1}); - return; - } -} - -void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - std::vector<int3> *work_groups) -{ - switch (tuning_type) - { - case TuningType::FAST: - { - int max_z_size = 16; - if (device_info.IsAdreno()) - { - max_z_size = device_info.IsAdreno3xx() ? 16 : 64; - } - max_z_size = std::min(max_z_size, device_info.max_work_group_size_z); - work_groups->push_back(GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size)); - return; - } - case TuningType::EXHAUSTIVE: - { - GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); - return; - } - default: - work_groups->push_back({8, 4, 1}); - return; - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h deleted file mode 100644 index c19890de1..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__ - -#include <vector> - -#include "TuningParameters.h" - -#include "open_cl/ClKernel.h" -#include "open_cl/DeviceInfo.h" -#include "open_cl/Types.h" -#include "open_cl/WorkgroupSelection.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -// multiplier can be power of two only -void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - WorkGroupSizeAlignment z_alignment, - std::vector<int3> *work_groups); - -void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - WorkGroupSizeAlignment z_alignment, - std::vector<int3> *work_groups); - -int3 GetWorkGroupXY128ConvLinear(const int3 &grid); - -int3 GetWorkGroupXY128Simple(const int3 &grid); -int3 GetWorkGroupXY128Conv(const int3 &grid); - -bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height); - -void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - std::vector<int3> *work_groups); - -void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info, - const KernelInfo &kernel_info, const int3 &grid, - std::vector<int3> *work_groups); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc deleted file mode 100644 index eac6f3270..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "ConvolutionSelector.h" - -#include "absl/memory/memory.h" -#include "open_cl/kernels/ConvBuffer1x1.h" -#include "open_cl/kernels/ConvConstants.h" -#include "open_cl/kernels/ConvPowervr.h" -#include "open_cl/kernels/ConvWeightsConverter.h" -#include "open_cl/kernels/WorkGroupPicking.h" -#include "open_cl/TensorType.h" -#include "open_cl/Util.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::unique_ptr<GPUOperation> SelectConvolutionAdreno(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints) -{ - if (IsConvConstantsSupported(device_info, op_def, attr)) - { - GPUOperation conv = CreateConvConstants(device_info, op_def, attr); - return absl::make_unique<GPUOperation>(std::move(conv)); - } - else - { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -std::unique_ptr<GPUOperation> SelectConvolutionWinogradAdreno(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, - ModelHints) -{ - ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); -} - -std::unique_ptr<GPUOperation> -SelectConvolutionDynamicWeightsAdreno(const Convolution2DAttributes &attr, - const BHWC &weights_shape, const BHWC &dst_shape, - const DeviceInfo &device_info, const OperationDef &op_def, - ModelHints, ConvWeightsDescription *weights_desc) -{ - ConvPowerVR conv = - CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape); - *weights_desc = conv.GetConvWeightsDescription(); - return absl::make_unique<ConvPowerVR>(std::move(conv)); -} - -std::unique_ptr<GPUOperation> SelectConvolutionNVidia(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - if (IsConvConstantsSupported(device_info, op_def, attr)) - { - GPUOperation conv = CreateConvConstants(device_info, op_def, attr); - return absl::make_unique<GPUOperation>(std::move(conv)); - } - else - { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -std::unique_ptr<GPUOperation> SelectConvolutionPowerVR(const Convolution2DAttributes &attr, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr); - return absl::make_unique<ConvPowerVR>(std::move(conv)); -} - -std::unique_ptr<GPUOperation> SelectConvolutionMali(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER && - IsConvBuffer1x1Supported(op_def, attr)) - { - ConvBuffer1x1 conv = CreateConvBuffer1x1(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvBuffer1x1>(std::move(conv)); - } - else - { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -std::unique_ptr<GPUOperation> SelectConvolutionWinogradMali(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER) - { - ConvBuffer1x1 conv = CreateConvBuffer1x1Wino4x4To6x6(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvBuffer1x1>(std::move(conv)); - } - else - { - ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -std::unique_ptr<GPUOperation> -SelectConvolutionDynamicWeightsMali(const Convolution2DAttributes &attr, const BHWC &weights_shape, - const BHWC &dst_shape, const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints, - ConvWeightsDescription *weights_desc) -{ - if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER && - IsConvBuffer1x1Supported(op_def, weights_shape, attr)) - { - ConvBuffer1x1 conv = - CreateConvBuffer1x1DynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape); - *weights_desc = conv.GetConvWeightsDescription(); - return absl::make_unique<ConvBuffer1x1>(std::move(conv)); - } - else - { - ConvPowerVR conv = - CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape); - *weights_desc = conv.GetConvWeightsDescription(); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -} // namespace - -std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints hints) -{ - if (device_info.IsAdreno()) - { - return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints); - } - else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsIntel()) - { - return SelectConvolutionPowerVR(attr, device_info, op_def); - } - else if (device_info.IsNvidia()) - { - return SelectConvolutionNVidia(attr, dst_shape, device_info, op_def); - } - else if (device_info.IsMali()) - { - return SelectConvolutionMali(attr, dst_shape, device_info, op_def); - } - else - { - return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints); - } -} - -std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, - ModelHints hints) -{ - if (device_info.IsAdreno()) - { - return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints); - } - else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsNvidia() || - device_info.IsIntel()) - { - ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } - else if (device_info.IsMali()) - { - return SelectConvolutionWinogradMali(attr, dst_shape, device_info, op_def); - } - else - { - return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints); - } -} - -std::unique_ptr<GPUOperation> -SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape, - const BHWC &dst_shape, const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints hints, - ConvWeightsDescription *weights_desc) -{ - if (device_info.IsAdreno()) - { - return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, device_info, - op_def, hints, weights_desc); - } - else if (device_info.IsMali()) - { - return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, device_info, op_def, - hints, weights_desc); - } - else - { - ConvPowerVR conv = - CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape); - *weights_desc = conv.GetConvWeightsDescription(); - return absl::make_unique<ConvPowerVR>(std::move(conv)); - } -} - -std::unique_ptr<GPUOperation> -SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def, - ModelHints) -{ - ConverterToConvWeights converter = ConverterToConvWeights(op_def, weights_desc); - return absl::make_unique<ConverterToConvWeights>(std::move(converter)); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h deleted file mode 100644 index d45eea8bd..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__ - -#include <memory> - -#include "open_cl/kernels/ConvCommon.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/ModelHints.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints hints); - -std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr, - const BHWC &dst_shape, - const DeviceInfo &device_info, - const OperationDef &op_def, - ModelHints hints); - -std::unique_ptr<GPUOperation> -SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape, - const BHWC &dst_shape, const DeviceInfo &device_info, - const OperationDef &op_def, ModelHints hints, - ConvWeightsDescription *weights_desc); - -std::unique_ptr<GPUOperation> -SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def, - ModelHints hints); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc deleted file mode 100644 index f07eef689..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DwConvolutionSelector.h" - -#include "absl/memory/memory.h" -#include "open_cl/ClDevice.h" -#include "open_cl/kernels/DepthwiseConv.h" -#include "open_cl/kernels/DepthwiseConv3x3.h" -#include "open_cl/Precision.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ -namespace -{ - -std::unique_ptr<GPUOperation> -SelectDWConvolutionAdreno(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, const OperationDef &op_def) -{ - if (IsDepthwiseConv3x3Supported(attr)) - { - return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr)); - } - else - { - return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr)); - } -} - -std::unique_ptr<GPUOperation> -SelectDWConvolutionPowerVR(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, const OperationDef &op_def) -{ - if (IsDepthwiseConv3x3Supported(attr)) - { - return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr)); - } - else - { - return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr)); - } -} - -std::unique_ptr<GPUOperation> SelectDWConvolutionMali(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - const auto storage_type = op_def.src_tensors[0].storage_type; - bool buffer_type = - storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER; - const MaliInfo mali_info = device_info.mali_info; - if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type && - op_def.precision != CalculationsPrecision::F32) - { - return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr)); - } - else - { - return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr)); - } -} -} // namespace - -std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, - const OperationDef &op_def) -{ - if (device_info.IsAdreno()) - { - return SelectDWConvolutionAdreno(attr, device_info, op_def); - } - else if (device_info.IsPowerVR()) - { - return SelectDWConvolutionPowerVR(attr, device_info, op_def); - } - else if (device_info.IsMali()) - { - return SelectDWConvolutionMali(attr, device_info, op_def); - } - else - { - return SelectDWConvolutionAdreno(attr, device_info, op_def); - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h deleted file mode 100644 index 2fa40c5c3..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__ - -#include <memory> - -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/Operations.h" -#include "open_cl/Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, - const OperationDef &op_def); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc deleted file mode 100644 index ac514b26c..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "SimpleSelectors.h" - -#include <memory> -#include <set> - -#include "open_cl/kernels/Add.h" -#include "open_cl/kernels/DepthwiseConv.h" -#include "open_cl/kernels/Pooling.h" -#include "open_cl/kernels/Relu.h" -#include "open_cl/kernels/Reshape.h" -#include "open_cl/kernels/Reshapex4.h" -#include "open_cl/kernels/Softmax.h" -#include "open_cl/kernels/Softmax1x1.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels, - std::unique_ptr<GPUOperation> *ptr) -{ - GPUOperation operation = CreateAdd(op_def, channels, dst_channels); - *ptr = std::make_unique<GPUOperation>(std::move(operation)); -} - -std::unique_ptr<GPUOperation> -SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, const OperationDef &op_def) -{ - return absl::make_unique<GPUOperation>( - CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr)); -} - -std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr, - const OperationDef &op_def) -{ - GPUOperation operation = CreatePooling(op_def, attr); - return absl::make_unique<GPUOperation>(std::move(operation)); -} - -std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def) -{ - return absl::make_unique<GPUOperation>(CreateReLU(op_def, attr)); -} - -void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def, - std::unique_ptr<GPUOperation> *ptr) -{ - if (src_channels % 4 == 0 && dst_channels % 4 == 0) - { - GPUOperation operation = CreateReshapex4(op_def); - *ptr = std::make_unique<GPUOperation>(std::move(operation)); - } - else - { - GPUOperation operation = CreateReshape(op_def); - *ptr = std::make_unique<GPUOperation>(std::move(operation)); - } -} - -void SelectSoftmax(const BHWC &shape, const OperationDef &op_def, - std::unique_ptr<GPUOperation> *ptr) -{ - if (shape.w == 1 && shape.h == 1) - { - Softmax1x1 operation = CreateSoftmax1x1(op_def); - *ptr = absl::make_unique<Softmax1x1>(std::move(operation)); - } - else - { - GPUOperation operation = CreateSoftmax(op_def); - *ptr = absl::make_unique<GPUOperation>(std::move(operation)); - } -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h deleted file mode 100644 index 2c5837a1d..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__ - -#include <memory> - -#include "open_cl/ClDevice.h" -#include "open_cl/kernels/GpuOperation.h" -#include "open_cl/Operations.h" -#include "open_cl/Shape.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels, - std::unique_ptr<GPUOperation> *ptr); - -std::unique_ptr<GPUOperation> -SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr, - const DeviceInfo &device_info, const OperationDef &op_def); - -std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr, - const OperationDef &op_def); - -std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def); - -void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def, - std::unique_ptr<GPUOperation> *ptr); - -void SelectSoftmax(const BHWC &shape, const OperationDef &op_def, - std::unique_ptr<GPUOperation> *ptr); - -} // namespace gpu_cl -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__ diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc index 6dd9bd252..d3ed102a1 100644 --- a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc +++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc @@ -16,10 +16,12 @@ #include "CLTensor.h" -#include "open_cl/Buffer.h" -#include "open_cl/ClContext.h" -#include "open_cl/Tensor.h" -#include "open_cl/TensorType.h" +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" + +using namespace tflite::gpu::cl; namespace onert { @@ -30,16 +32,15 @@ namespace gpu_cl namespace operand { -CLTensor::CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment) - : ICLTensor{rank, shape, environment}, _tensor(std::make_shared<Tensor>()) +CLTensor::CLTensor(size_t rank, ir::Shape shape, + std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type) + : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>()) { } -const Tensor *CLTensor::handle() const { return _tensor.get(); } - -Tensor *CLTensor::handle() { return _tensor.get(); } +const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); } -void CLTensor::setBuffer(void *host_ptr) { (void)host_ptr; } +tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); } } // namespace operand } // namespace gpu_cl diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h index 7d2e70a99..f2153f430 100644 --- a/runtime/onert/backend/gpu_cl/operand/CLTensor.h +++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h @@ -19,9 +19,9 @@ #include "ICLTensor.h" -#include "open_cl/Buffer.h" -#include "open_cl/ClContext.h" -#include "open_cl/Tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" namespace onert { @@ -38,11 +38,12 @@ public: CLTensor() = delete; public: - CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment); + CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment, + TensorType type); public: - const Tensor *handle() const override; - Tensor *handle() override; + const tflite::gpu::cl::Tensor *handle() const override; + tflite::gpu::cl::Tensor *handle() override; public: /** Set given buffer as the buffer of the tensor @@ -55,7 +56,7 @@ public: void setBuffer(void *host_ptr); private: - std::shared_ptr<Tensor> _tensor; + std::shared_ptr<tflite::gpu::cl::Tensor> _tensor; }; } // namespace operand diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc index 3f070be0c..a95f78056 100644 --- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc +++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc @@ -16,11 +16,11 @@ #include "ICLTensor.h" -#include "open_cl/Api.h" -#include "open_cl/Spi.h" -#include "open_cl/OpenclWrapper.h" -#include "open_cl/TensorTypeUtil.h" -#include "open_cl/kernels/Converter.h" +#include "tensorflow/lite/delegates/gpu/api.h" +#include "tensorflow/lite/delegates/gpu/spi.h" +#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h" namespace onert { @@ -31,6 +31,10 @@ namespace gpu_cl namespace operand { +using namespace tflite::gpu; +using namespace tflite::gpu::cl; +using namespace tflite::gpu::internal_tensor; + void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn) { if (total_size() == 0) @@ -39,100 +43,133 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn) fn(*this); } -void ICLTensor::enqueueWriteBuffer(const void *ptr, bool) +void ICLTensor::writeConvertInit() { - const float *arr = (float *)ptr; - TensorObject input_obj = MakeReadableCpuMemory(absl::MakeSpan(arr, total_size() / 4)); + TensorObjectDef input_def; + input_def.dimensions.b = handle()->Batch(); + input_def.dimensions.h = handle()->Height(); + input_def.dimensions.w = handle()->Width(); + input_def.dimensions.c = handle()->Channels(); + input_def.object_def.data_layout = DataLayout::BHWC; + input_def.object_def.data_type = DataType::FLOAT32; + input_def.object_def.object_type = ObjectType::CPU_MEMORY; + input_def.object_def.user_provided = true; - TensorObject output_obj; + TensorObjectDef permute_def = input_def; + permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); - if (handle()->GetStorageType() == TensorStorageType::BUFFER) + auto dims = permute_def.dimensions; + const BHWC shape(dims.b, dims.h, dims.w, dims.c); + const TensorDescriptor desc{ + permute_def.object_def.data_type, + ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout), + Layout::BHWC}; + if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok()) { - output_obj = OpenClBuffer{handle()->GetMemoryPtr()}; + throw std::runtime_error("Failed to AllocateTensorMemory"); } - else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER) + + TensorObjectDef output_def = permute_def; + output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); + output_def.object_def.data_type = handle()->GetDataType(); + input_def.object_def.user_provided = false; + + _converter_builder = NewConverterBuilder(_environment.get()); + if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok()) { - output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()}; + throw std::runtime_error("Failed to make converter_to"); } - else + if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok()) { - output_obj = OpenClTexture{handle()->GetMemoryPtr()}; + throw std::runtime_error("Failed to make converter_from"); } +} + +void ICLTensor::readConvertInit() +{ + _converter_builder = NewConverterBuilder(_environment.get()); TensorObjectDef input_def; input_def.dimensions.b = handle()->Batch(); input_def.dimensions.h = handle()->Height(); input_def.dimensions.w = handle()->Width(); input_def.dimensions.c = handle()->Channels(); - input_def.object_def.data_layout = DataLayout::BHWC; - input_def.object_def.data_type = DataType::FLOAT32; - input_def.object_def.object_type = ObjectType::CPU_MEMORY; - input_def.object_def.user_provided = true; + input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); + input_def.object_def.data_type = handle()->GetDataType(); + input_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); + input_def.object_def.user_provided = false; - TensorObjectDef tmp_def; - tmp_def.dimensions.b = handle()->Batch(); - tmp_def.dimensions.h = handle()->Height(); - tmp_def.dimensions.w = handle()->Width(); - tmp_def.dimensions.c = handle()->Channels(); - tmp_def.object_def.data_layout = DataLayout::BHWC; - tmp_def.object_def.data_type = DataType::FLOAT32; - tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); - tmp_def.object_def.user_provided = true; - - auto dims = tmp_def.dimensions; + TensorObjectDef permute_def = input_def; + permute_def.object_def.data_layout = DataLayout::BHWC; + permute_def.object_def.data_type = DataType::FLOAT32; + permute_def.object_def.user_provided = true; + + auto dims = permute_def.dimensions; const BHWC shape(dims.b, dims.h, dims.w, dims.c); const TensorDescriptor desc{ - tmp_def.object_def.data_type, - ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout), + permute_def.object_def.data_type, + ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout), Layout::BHWC}; if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok()) { - throw std::runtime_error("AllocateTensorMemory error."); + throw std::runtime_error("Failed to AllocateTensorMemory"); } - TensorObject tmp_obj; - if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE) + + TensorObjectDef output_def = permute_def; + output_def.object_def.object_type = ObjectType::CPU_MEMORY; + + if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok()) { - tmp_obj = OpenClTexture{_cl_memory.memory()}; + throw std::runtime_error("Failed to make converter_from"); } - else + if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok()) { - tmp_obj = OpenClBuffer{_cl_memory.memory()}; + throw std::runtime_error("Failed to make converter_to"); } +} - TensorObjectDef output_def = input_def; - output_def.dimensions.b = handle()->Batch(); - output_def.dimensions.h = handle()->Height(); - output_def.dimensions.w = handle()->Width(); - output_def.dimensions.c = handle()->Channels(); - output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); - output_def.object_def.data_type = handle()->GetDataType(); - output_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); +void ICLTensor::enqueueWriteBuffer(const void *ptr, bool) +{ + TensorObject input_obj = + MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements())); - _converter_builder = NewConverterBuilder(_environment.get()); - if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_cpu).ok()) + TensorObject output_obj; + + TensorObject permute_obj; + if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE) { - throw std::runtime_error("MakeConverter<_converter_cpu> error."); + permute_obj = OpenClTexture{_cl_memory.memory()}; } - if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_bhwc).ok()) + else { - throw std::runtime_error("MakeConverter<_converter_bhwc> error."); + permute_obj = OpenClBuffer{_cl_memory.memory()}; } - if (!_converter_cpu->Convert(input_obj, tmp_obj).ok()) + if (handle()->GetStorageType() == TensorStorageType::BUFFER) { - throw std::runtime_error("[w] _converter_cpu Convert error."); + output_obj = OpenClBuffer{handle()->GetMemoryPtr()}; } - if (!_converter_bhwc->Convert(tmp_obj, output_obj).ok()) + else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER) { - throw std::runtime_error("[w] _converter_bhwc Convert error."); + output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()}; + } + else + { + output_obj = OpenClTexture{handle()->GetMemoryPtr()}; + } + + if (!_converter_to->Convert(input_obj, permute_obj).ok()) + { + throw std::runtime_error("Failed to write cl buffer from cpu memory"); + } + if (!_converter_from->Convert(permute_obj, output_obj).ok()) + { + throw std::runtime_error("Failed to change layout"); } } void ICLTensor::enqueueReadBuffer(void *ptr, bool) { - float *arr = (float *)ptr; - TensorObject output_obj = MakeCpuMemory(absl::MakeSpan(arr, total_size() / 4)); - TensorObject input_obj; if (handle()->GetStorageType() == TensorStorageType::BUFFER) @@ -148,72 +185,26 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool) input_obj = OpenClTexture{handle()->GetMemoryPtr()}; } - TensorObjectDef input_def; - input_def.dimensions.b = handle()->Batch(); - input_def.dimensions.h = handle()->Height(); - input_def.dimensions.w = handle()->Width(); - input_def.dimensions.c = handle()->Channels(); - input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); - input_def.object_def.data_type = handle()->GetDataType(); - input_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); - input_def.object_def.user_provided = false; - - TensorObjectDef tmp_def; - tmp_def.dimensions.b = handle()->Batch(); - tmp_def.dimensions.h = handle()->Height(); - tmp_def.dimensions.w = handle()->Width(); - tmp_def.dimensions.c = handle()->Channels(); - tmp_def.object_def.data_layout = DataLayout::BHWC; - tmp_def.object_def.data_type = DataType::FLOAT32; - tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); - tmp_def.object_def.user_provided = true; - - auto dims = tmp_def.dimensions; - const BHWC shape(dims.b, dims.h, dims.w, dims.c); - const TensorDescriptor desc{ - tmp_def.object_def.data_type, - ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout), - Layout::BHWC}; - if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok()) + TensorObject permute_obj; + if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE) { - throw std::runtime_error("AllocateTensorMemory error."); - } - TensorObject tmp_obj; - if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE) - { - tmp_obj = OpenClTexture{_cl_memory.memory()}; + permute_obj = OpenClTexture{_cl_memory.memory()}; } else { - tmp_obj = OpenClBuffer{_cl_memory.memory()}; + permute_obj = OpenClBuffer{_cl_memory.memory()}; } - TensorObjectDef output_def = input_def; - output_def.dimensions.b = handle()->Batch(); - output_def.dimensions.h = handle()->Height(); - output_def.dimensions.w = handle()->Width(); - output_def.dimensions.c = handle()->Channels(); - output_def.object_def.data_layout = DataLayout::BHWC; - output_def.object_def.data_type = DataType::FLOAT32; - output_def.object_def.object_type = ObjectType::CPU_MEMORY; - output_def.object_def.user_provided = true; - _converter_builder = NewConverterBuilder(_environment.get()); - if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_bhwc).ok()) - { - throw std::runtime_error("MakeConverter<_converter_bhwc> error."); - } - if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_cpu).ok()) - { - throw std::runtime_error("MakeConverter<_converter_cpu> error."); - } + TensorObject output_obj = + MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements())); - if (!_converter_bhwc->Convert(input_obj, tmp_obj).ok()) + if (!_converter_from->Convert(input_obj, permute_obj).ok()) { - throw std::runtime_error("[r] _converter_bhwc Convert error."); + throw std::runtime_error("Failed to change layout"); } - if (!_converter_cpu->Convert(tmp_obj, output_obj).ok()) + if (!_converter_to->Convert(permute_obj, output_obj).ok()) { - throw std::runtime_error("[r] _converter_cpu Convert error."); + throw std::runtime_error("Failed to read cl buffer"); } } diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h index 28e905d48..b8ad4469f 100644 --- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h +++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h @@ -19,11 +19,14 @@ #include <backend/ITensor.h> -#include "open_cl/Api.h" -#include "open_cl/Spi.h" -#include "open_cl/ClCommandQueue.h" -#include "open_cl/kernels/Converter.h" -#include "open_cl/Tensor.h" +#include "tensorflow/lite/delegates/gpu/api.h" +#include "tensorflow/lite/delegates/gpu/spi.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/environment.h" + +#include "TensorBuilderHelper.h" namespace onert { @@ -43,19 +46,18 @@ public: ICLTensor(ICLTensor &&) = default; ICLTensor &operator=(ICLTensor &&) = default; - ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment) - : _rank{rank}, _shape{shape}, _environment(environment) + ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment, + TensorType type) + : _rank{rank}, _shape{shape}, _environment(environment), _type(type) { } public: uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); } size_t total_size() const final { return _shape.num_elements() * sizeof(float); } - size_t calcOffset(const ir::Coordinates &coords) const final + size_t calcOffset(const ir::Coordinates &) const final { - // NYI - (void)coords; - return 0; + throw std::runtime_error("ICLTensor::calcOffset() is not supported."); } ir::Layout layout() const final { return ir::Layout::NHWC; } ir::DataType data_type() const final { return ir::DataType::FLOAT32; } @@ -83,19 +85,24 @@ public: void enqueueWriteBuffer(const void *ptr, bool blocking = true) final; void enqueueReadBuffer(void *ptr, bool blocking = true) final; + void writeConvertInit(); + void readConvertInit(); + TensorType get_type() { return _type; } + public: - virtual const Tensor *handle() const = 0; - virtual Tensor *handle() = 0; + virtual const tflite::gpu::cl::Tensor *handle() const = 0; + virtual tflite::gpu::cl::Tensor *handle() = 0; private: protected: size_t _rank; // Actual rank (reflects extended rank) ir::Shape _shape; - std::shared_ptr<Environment> _environment; - std::unique_ptr<TensorObjectConverterBuilder> _converter_builder; - CLMemory _cl_memory; - std::unique_ptr<TensorObjectConverter> _converter_cpu; - std::unique_ptr<TensorObjectConverter> _converter_bhwc; + std::shared_ptr<tflite::gpu::cl::Environment> _environment; + TensorType _type; + std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder; + tflite::gpu::cl::CLMemory _cl_memory; + std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to; + std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from; }; } // namespace operand diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h index 5dfdc7ec5..716400c1f 100644 --- a/runtime/onert/backend/ruy/ops/OperationUtils.h +++ b/runtime/onert/backend/ruy/ops/OperationUtils.h @@ -18,17 +18,17 @@ #define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ #include <backend/IPortableTensor.h> +#include <ir/DataType.h> +#include <ir/Padding.h> +#include <util/CalculateActivationRange.h> #include <ruy/Shape.h> #include <ruy/Types.h> -#include <iostream> -#include <ir/DataType.h> -#include <ir/InternalType.h> -#include <ir/Padding.h> #include <limits> using OperandType = onert::ir::DataType; +using namespace onert::util; namespace onert { @@ -79,40 +79,6 @@ inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Ac } } -template <typename T> -void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) -{ - if (activation == ir::Activation::RELU) - { - *activation_min = 0; - *activation_max = std::numeric_limits<T>::max(); - } - else if (activation == ir::Activation::RELU6) - { - *activation_min = 0; - *activation_max = 6; - } - else if (activation == ir::Activation::RELU1) - { - *activation_min = -1; - *activation_max = 1; - } - else if (activation == ir::Activation::SIGMOID) - { - *activation_min = 0; - *activation_max = 1; - } - else if (activation == ir::Activation::NONE) - { - *activation_min = std::numeric_limits<T>::lowest(); - *activation_max = std::numeric_limits<T>::max(); - } - else - { - std::cout << "Unsupported fused activation function." << std::endl; - } -} - nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type); } // namespace ops diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h new file mode 100644 index 000000000..a63839720 --- /dev/null +++ b/runtime/onert/backend/trix/Backend.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_BACKEND_H__ +#define __ONERT_BACKEND_TRIX_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "KernelGenerator.h" + +#include <backend/Backend.h> + +#include <memory> + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared<Config>()} {} + + std::shared_ptr<IConfig> config() const override { return _config; } + + std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override + { + auto &graph = *data.graph; + auto context = std::make_unique<BackendContext>(this, std::move(data)); + auto tr = std::make_shared<basic::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context()); + return context; + } + +private: + std::shared_ptr<IConfig> _config; +}; + +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_BACKEND_H__ diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc new file mode 100644 index 000000000..e46b11d20 --- /dev/null +++ b/runtime/onert/backend/trix/BackendContext.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/basic/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); } + +FunctionMap BackendContext::genKernels() +{ + FunctionMap ret; + + for (auto op_ind : _data.op_order) + { + auto fn_seq = kernel_gen->generate(op_ind); + ret.emplace_back(op_ind, std::move(fn_seq)); + } + + basic::initConsts(*this); + + // NOTE For memory optimization, we want to free some operand data + const_cast<ir::Graph &>(*_data.graph) + .operands() + .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/BackendContext.h b/runtime/onert/backend/trix/BackendContext.h new file mode 100644 index 000000000..c0734c46d --- /dev/null +++ b/runtime/onert/backend/trix/BackendContext.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__ + +#include <backend/BackendContext.h> +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "DevContext.h" + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, ContextData &&data, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, + std::shared_ptr<TensorBuilder> tensor_builder = nullptr, + std::shared_ptr<KernelGenerator> kernel_gen = nullptr) + : onert::backend::BackendContext(backend, std::move(data), tensor_registry), + tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _dev_context(new DevContext) + { + } + + ITensorRegistry *genTensors() override; + FunctionMap genKernels() override; + + std::shared_ptr<DevContext> dev_context() { return _dev_context; } + +public: + // TODO Make it private + std::shared_ptr<TensorBuilder> tensor_builder; + std::shared_ptr<KernelGenerator> kernel_gen; + +private: + std::shared_ptr<DevContext> _dev_context; +}; + +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt new file mode 100644 index 000000000..5455757ca --- /dev/null +++ b/runtime/onert/backend/trix/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LIB_ONERT_BACKEND_TRIX onert_backend_trix) + +nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +if(NOT TRIXEngine_FOUND) + return() +endif(NOT TRIXEngine_FOUND) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE trix_engine) +target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_coverage) + +set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES OUTPUT_NAME backend_trix) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRIX} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRIX}>) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION lib) diff --git a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h b/runtime/onert/backend/trix/Config.cc index 81efd666f..c23326423 100644 --- a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h +++ b/runtime/onert/backend/trix/Config.cc @@ -1,12 +1,11 @@ /* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,25 +14,19 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__ -#define __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__ +#include "Config.h" namespace onert { namespace backend { -namespace gpu_cl +namespace trix { -enum class AccessType -{ - UNKNOWN, - READ, - WRITE, - READ_WRITE, -}; -} // namespace gpu_cl +bool Config::initialize() { return true; } + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } + +} // namespace trix } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__ diff --git a/runtime/onert/backend/trix/Config.h b/runtime/onert/backend/trix/Config.h new file mode 100644 index 000000000..799047d6f --- /dev/null +++ b/runtime/onert/backend/trix/Config.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_CONFIG_H__ +#define __ONERT_BACKEND_TRIX_CONFIG_H__ + +#include <backend/IConfig.h> +#include <memory> +#include <util/ITimer.h> + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "trix"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } + + std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); } +}; + +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_CONFIG_H__ diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h new file mode 100644 index 000000000..482932fd4 --- /dev/null +++ b/runtime/onert/backend/trix/DevContext.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__ +#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__ + +#include <libnpuhost.h> + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +class DevContext +{ +public: + DevContext() + { + auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP); + if (device_count <= 0) + { + throw std::runtime_error("Unable to find TRIV2 NPU device"); + } + + // Use NPU 0 device + if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0) + { + throw std::runtime_error("Failed to get TRIV2 NPU device handle"); + } + } + + ~DevContext() + { + if (_dev_handle != nullptr) + { + unregisterNPUmodel_all(_dev_handle); + putNPUdevice(_dev_handle); + } + } + + npudev_h getDev() { return _dev_handle; } + + template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors) + { + info->num_info = static_cast<uint32_t>(tensors.size()); + + for (uint32_t idx = 0; idx < info->num_info; ++idx) + { + info->info[idx].layout = convertDataLayout(tensors[idx]->layout()); + info->info[idx].type = convertDataType(tensors[idx]->data_type()); + } + } + + template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors) + { + buf->num_buffers = static_cast<uint32_t>(tensors.size()); + + for (uint32_t idx = 0; idx < buf->num_buffers; ++idx) + { + buf->bufs[idx].addr = tensors[idx]->buffer(); + buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size()); + buf->bufs[idx].type = BUFFER_MAPPED; + } + } + +private: + data_layout convertDataLayout(const ir::Layout layout) + { + switch (layout) + { + case ir::Layout::NCHW: + return DATA_LAYOUT_NCHW; + case ir::Layout::NHWC: + return DATA_LAYOUT_NHWC; + default: + throw std::runtime_error("Unknown Layout"); + } + } + + data_type convertDataType(const ir::DataType type) + { + switch (type) + { + case ir::DataType::QUANT_UINT8_ASYMM: + return DATA_TYPE_QASYMM8; + case ir::DataType::QUANT_INT16_SYMM: + return DATA_TYPE_QSYMM16; + default: + throw std::runtime_error("Unsupported data type"); + } + } + +private: + // NPU device handle + // TODO Support multicore npu device + npudev_h _dev_handle; +}; + +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__ diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc new file mode 100644 index 000000000..68e6840dd --- /dev/null +++ b/runtime/onert/backend/trix/KernelGenerator.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/BulkLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> +#include <exec/DynamicShapeInferer.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +KernelGenerator::KernelGenerator(const ir::Graph &graph, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<basic::TensorRegistry> &tensor_reg, + const std::shared_ptr<DevContext> &dev_context) + : basic::KernelGeneratorBase{graph}, + _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()}, + _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context} +{ + // DO NOTHING +} + +std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind) +{ + auto ret = std::make_unique<exec::FunctionSequence>(); + ret->enableDynamicShapeInferer(false); + + const auto &op = _graph.operations().at(ind); + op.accept(*this); + ret->append(releaseFunction()); + return ret; +} + +void KernelGenerator::visit(const ir::operation::Bulk &node) +{ + using ir::operation::Bulk; + + std::vector<IPortableTensor *> output_tensors; + for (auto &ofm_idx : node.getOutputs()) + output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx)); + + std::vector<const IPortableTensor *> input_tensors; + for (auto &ifm_idx : node.getInputs()) + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx)); + + // parameters + const auto binary_path = node.param().binary_path; + + auto fn = std::make_unique<ops::BulkLayer>(); + + fn->configure(input_tensors, output_tensors, binary_path, _dev_context); + + _return_fn = std::move(fn); +} + +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h new file mode 100644 index 000000000..d87dc6952 --- /dev/null +++ b/runtime/onert/backend/trix/KernelGenerator.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__ + +#include "TensorBuilder.h" +#include "backend/basic/TensorRegistry.h" +#include "Tensor.h" +#include "DevContext.h" + +#include <backend/basic/KernelGeneratorBase.h> +#include <ir/Operands.h> +#include <ir/Operations.h> + +namespace onert +{ +namespace backend +{ +namespace trix +{ + +class KernelGenerator : public basic::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<basic::TensorRegistry> &tensor_reg, + const std::shared_ptr<DevContext> &dev_context); + + std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override; + +private: + void visit(const ir::operation::Bulk &node) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + ir::Layout _current_layout; + std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<basic::TensorRegistry> _tensor_reg; + const std::shared_ptr<DevContext> _dev_context; +}; + +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/trix/Tensor.h index 708436327..5138cee71 100644 --- a/runtime/onert/backend/acl_common/ParentInfo.h +++ b/runtime/onert/backend/trix/Tensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,31 +14,24 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__ -#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__ +#ifndef __ONERT_BACKEND_TRIX_TENSOR_H__ +#define __ONERT_BACKEND_TRIX_TENSOR_H__ -#include <ir/Index.h> -#include <ir/Coordinates.h> +#include <backend/basic/Tensor.h> +#include <ir/Data.h> namespace onert { namespace backend { -namespace acl_common +namespace trix { -/** - * @brief Struct to represent parent operand in child operand - */ -struct ParentInfo -{ - ir::OperandIndex parent; - ir::Layout frontend_layout; - ir::Coordinates coordinates; -}; +using Tensor = basic::Tensor; +using ExternalTensor = basic::ExternalTensor; -} // namespace acl_common +} // namespace trix } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__ +#endif // __ONERT_BACKEND_TRIX_TENSOR_H__ diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc b/runtime/onert/backend/trix/TensorBuilder.h index 774f8151f..ac6ca0f9a 100644 --- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc +++ b/runtime/onert/backend/trix/TensorBuilder.h @@ -1,12 +1,11 @@ /* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,30 +14,22 @@ * limitations under the License. */ -#include "GpuObject.h" +#ifndef __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__ + +#include <backend/basic/TensorBuilder.h> namespace onert { namespace backend { -namespace gpu_cl +namespace trix { -std::string MemoryTypeToCLType(MemoryType type) -{ - switch (type) - { - case MemoryType::GLOBAL: - return "__global"; - case MemoryType::CONSTANT: - return "__constant"; - break; - case MemoryType::LOCAL: - return "__local"; - } - return ""; -} +using TensorBuilder = basic::TensorBuilder; -} // namespace gpu_cl +} // namespace trix } // namespace backend } // namespace onert + +#endif // __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc new file mode 100644 index 000000000..71fdf3f0d --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkLayer.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BulkLayer.h" +#include <util/logging.h> + +#include <libnpuhost.h> + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr) +{ + // DO NOTHING +} + +BulkLayer::~BulkLayer() { free(_meta); } + +void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs, + std::vector<IPortableTensor *> &outputs, std::string binary_path, + const std::shared_ptr<DevContext> &dev_context) +{ + _inputs = inputs; + _outputs = outputs; + _dev_context = dev_context; + + _meta = getNPUmodel_metadata(binary_path.c_str(), false); + if (_meta == nullptr) + { + throw std::runtime_error("Unable to extract the model metadata"); + } + + generic_buffer model_file; + model_file.type = BUFFER_FILE; + model_file.filepath = binary_path.c_str(); + model_file.size = _meta->size; + + if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0) + { + throw std::runtime_error("Failed to register npu model"); + } +} + +void BulkLayer::run() +{ + int req_id; + if (createNPU_request(_dev_context->getDev(), _model_id, &req_id)) + { + throw std::runtime_error("Unable to create NPU request with model id (" + + std::to_string(_model_id) + ")"); + } + + if (_meta->input_seg_num != _inputs.size()) + { + throw std::runtime_error("input size does not match to model input seg num"); + } + + if (_meta->output_seg_num != _outputs.size()) + { + throw std::runtime_error("output size does not match to model output seg num"); + } + + tensors_data_info in_info; + tensors_data_info out_info; + _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs); + _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs); + + input_buffers input_buf; + output_buffers output_buf; + _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs); + _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs); + + if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf, + &out_info)) + { + throw std::runtime_error("Unable to create NPU request for model id (" + + std::to_string(_model_id) + ")"); + } + + if (submitNPU_request(_dev_context->getDev(), req_id)) + { + throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) + + ")"); + } + + if (removeNPU_request(_dev_context->getDev(), req_id)) + { + throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) + + ")"); + } +} + +void BulkLayer::prepare() +{ + // DO NOTHING +} + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h new file mode 100644 index 000000000..f7080ccad --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkLayer.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__ + +#include <backend/IPortableTensor.h> +#include "../DevContext.h" + +#include <exec/IFunction.h> + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +class BulkLayer : public ::onert::exec::IFunction +{ +public: + BulkLayer(); + ~BulkLayer(); + +public: + void configure(const std::vector<const IPortableTensor *> &inputs, + std::vector<IPortableTensor *> &outputs, std::string binary_path, + const std::shared_ptr<DevContext> &dev_context); + + void run() override; + + void prepare() override; + +private: + std::vector<const IPortableTensor *> _inputs; + std::vector<IPortableTensor *> _outputs; + + uint32_t _model_id; + npubin_meta *_meta; + std::shared_ptr<DevContext> _dev_context; +}; + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__ diff --git a/runtime/onert/backend/trix/trix.cc b/runtime/onert/backend/trix/trix.cc new file mode 100644 index 000000000..816fb4406 --- /dev/null +++ b/runtime/onert/backend/trix/trix.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +extern "C" { + +onert::backend::Backend *onert_backend_create() { return new onert::backend::trix::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } +} diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h index 5102e32dd..fe93fccc0 100644 --- a/runtime/onert/backend/xnnpack/ops/OperationUtils.h +++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h @@ -17,10 +17,10 @@ #ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ #define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ -// duplicated from cpu/ops/OperationUtils.h +#include <ir/DataType.h> #include <ir/InternalType.h> #include <ir/Padding.h> -#include <ir/DataType.h> +#include <util/CalculateActivationRange.h> namespace onert { @@ -32,40 +32,7 @@ namespace ops { using OperandType = ir::DataType; - -template <typename T> -void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) -{ - if (activation == ir::Activation::RELU) - { - *activation_min = 0; - *activation_max = std::numeric_limits<T>::max(); - } - else if (activation == ir::Activation::RELU6) - { - *activation_min = 0; - *activation_max = 6; - } - else if (activation == ir::Activation::RELU1) - { - *activation_min = -1; - *activation_max = 1; - } - else if (activation == ir::Activation::SIGMOID) - { - *activation_min = 0; - *activation_max = 1; - } - else if (activation == ir::Activation::NONE) - { - *activation_min = std::numeric_limits<T>::lowest(); - *activation_max = std::numeric_limits<T>::max(); - } - else - { - throw std::runtime_error{"Unsupported fused activation function"}; - } -} +using namespace onert::util; // CalculateActivationRange } // namespace ops } // namespace xnnpack diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h index d3ef6d4af..10ca8e9fc 100644 --- a/runtime/onert/core/include/compiler/LoweredGraph.h +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -60,6 +60,7 @@ public: private: void makeLowerInfo(const compiler::BackendResolver &backend_resolver); void dumpLowerInfo(); + void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options); private: ir::Graph _graph; diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h index e77c308ea..0ec0e0711 100644 --- a/runtime/onert/core/include/ir/DataType.h +++ b/runtime/onert/core/include/ir/DataType.h @@ -38,6 +38,7 @@ enum class DataType QUANT_INT8_ASYMM = 9, QUANT_INT16_ASYMM = 10, QUANT_INT8_SYMM_PER_CHANNEL = 11, + QUANT_INT16_SYMM = 12, }; size_t sizeOfDataType(DataType data_type); diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h index 0eb45e1ee..4602fafec 100644 --- a/runtime/onert/core/include/ir/Operations.Include.h +++ b/runtime/onert/core/include/ir/Operations.Include.h @@ -24,6 +24,7 @@ #include "ir/operation/BCQGather.h" #include "ir/operation/BinaryArithmetic.h" #include "ir/operation/BroadcastTo.h" +#include "ir/operation/Bulk.h" #include "ir/operation/Comparison.h" #include "ir/operation/Concat.h" #include "ir/operation/Conv2D.h" diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst index f17fdfdd7..f37d89505 100644 --- a/runtime/onert/core/include/ir/Operations.lst +++ b/runtime/onert/core/include/ir/Operations.lst @@ -27,6 +27,7 @@ OP(BCQFullyConnected) OP(BCQGather) OP(BinaryArithmetic) OP(BroadcastTo) +OP(Bulk) OP(Comparison) OP(Concat) OP(Conv2D) diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h new file mode 100644 index 000000000..1825f7fad --- /dev/null +++ b/runtime/onert/core/include/ir/operation/Bulk.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_OPERATION_BULK_H__ +#define __ONERT_IR_OPERATION_BULK_H__ + +#include "ir/Operation.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +class Bulk : public Operation +{ +public: + struct Param + { + std::string binary_path; + }; + +public: + Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + OpCode opcode() const final { return OpCode::Bulk; } + const Param ¶m() const { return _param; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace onert + +#endif // __ONERT_IR_OPERATION_BULK_H__ diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h new file mode 100644 index 000000000..db76f9dde --- /dev/null +++ b/runtime/onert/core/include/util/CalculateActivationRange.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ +#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ + +#include "ir/InternalType.h" + +namespace onert +{ +namespace util +{ + +template <typename T> +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + } + else + { + throw std::runtime_error{"Unsupported fused activation function."}; + } +} + +} // namespace util +} // namespace onert + +#endif // __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 93792dd1c..6a1d8fcec 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -64,6 +64,52 @@ std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to return opbackends; } +void verboseOptions(compiler::CompilerOptions &options) +{ + VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl; + VERBOSE(Compiler) << "backend_list : " + << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(), + "/") + << std::endl; + VERBOSE(Compiler) << "trace_filepath : " << options.trace_filepath << std::endl; + VERBOSE(Compiler) << "graph_dump_level : " << options.graph_dump_level << std::endl; + VERBOSE(Compiler) << "executor : " << options.executor << std::endl; + VERBOSE(Compiler) << "manual backend_for_all : " + << options.manual_scheduler_options.backend_for_all << std::endl; + VERBOSE(Compiler) << "manual_scheduler_options : " + << getOpBackends(options.manual_scheduler_options.opcode_to_backend) + << std::endl; + VERBOSE(Compiler) << "he_scheduler : " << options.he_scheduler << std::endl; + VERBOSE(Compiler) << "he_profiling_mode : " << options.he_profiling_mode << std::endl; + VERBOSE(Compiler) << "disable_compile : " << options.disable_compile << std::endl; + VERBOSE(Compiler) << "fp16_enable : " << options.fp16_enable << std::endl + << std::noboolalpha; +} + +void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs, + const std::string &str) +{ + // TODO Support multiple subgraphs for manual scheduling + auto key_val_list = nnfw::misc::split(str, ';'); + for (const auto &key_val_str : key_val_list) + { + if (key_val_str.empty()) + { + continue; + } + + auto key_val = nnfw::misc::split(key_val_str, '='); + const auto &key_str = key_val.at(0); + const auto &val = key_val.at(1); + auto key = static_cast<uint32_t>(std::stoi(key_str)); + + subgs.at(ir::SubgraphIndex{0}) + ->operations() + .at(ir::OperationIndex{key}); // Check if exist, or this wil throw + ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); + } +} + } // namespace namespace onert @@ -104,26 +150,8 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) #undef OP // Index to Backend - // TODO Support multiple subgraphs for manual scheduling auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); - auto key_val_list = nnfw::misc::split(map_str, ';'); - for (const auto &key_val_str : key_val_list) - { - if (key_val_str.empty()) - { - continue; - } - - auto key_val = nnfw::misc::split(key_val_str, '='); - const auto &key_str = key_val.at(0); - const auto &val = key_val.at(1); - auto key = static_cast<uint32_t>(std::stoi(key_str)); - - subgs.at(ir::SubgraphIndex{0}) - ->operations() - .at(ir::OperationIndex{key}); // Check if exist, or this wil throw - ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); - } + setBackendMap(ms_options, subgs, map_str); } return options; } @@ -143,22 +171,10 @@ void Compiler::enableToFp16() { _options.fp16_enable = true; } void Compiler::set_backend_from_str(const char *backend_settings) { + assert(_subgraphs != nullptr); // Backend for all auto &ms_options = _options.manual_scheduler_options; - auto key_val_list = nnfw::misc::split(backend_settings, ';'); - for (const auto &key_val_str : key_val_list) - { - if (key_val_str.empty()) - { - continue; - } - - auto key_val = nnfw::misc::split(key_val_str, '='); - const auto &key_str = key_val.at(0); - const auto &val = key_val.at(1); - auto key = static_cast<uint32_t>(std::stoi(key_str)); - ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); - } + setBackendMap(ms_options, *_subgraphs, std::string{backend_settings}); } void Compiler::checkProfilerConditions() @@ -344,26 +360,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; } - { - VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl; - VERBOSE(Compiler) << "backend_list : " - << nnfw::misc::join(_options.backend_list.begin(), - _options.backend_list.end(), "/") - << std::endl; - VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl; - VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl; - VERBOSE(Compiler) << "executor : " << _options.executor << std::endl; - VERBOSE(Compiler) << "manual backend_for_all : " - << _options.manual_scheduler_options.backend_for_all << std::endl; - VERBOSE(Compiler) << "manual_scheduler_options : " - << getOpBackends(_options.manual_scheduler_options.opcode_to_backend) - << std::endl; - VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl; - VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl; - VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl; - VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl - << std::noboolalpha; - } + verboseOptions(_options); _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { // Mandatory passes @@ -544,26 +541,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa _options.tracing_ctx = nullptr; } - { - VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl; - VERBOSE(Compiler) << "backend_list : " - << nnfw::misc::join(_options.backend_list.begin(), - _options.backend_list.end(), "/") - << std::endl; - VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl; - VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl; - VERBOSE(Compiler) << "executor : " << _options.executor << std::endl; - VERBOSE(Compiler) << "manual backend_for_all : " - << _options.manual_scheduler_options.backend_for_all << std::endl; - VERBOSE(Compiler) << "manual_scheduler_options : " - << getOpBackends(_options.manual_scheduler_options.opcode_to_backend) - << std::endl; - VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl; - VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl; - VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl; - VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl - << std::noboolalpha; - } + verboseOptions(_options); _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { // Mandatory passes diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index ba038e935..f9db1ca89 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,26 +16,25 @@ #include "ExecutorFactory.h" -#include <deque> -#include <functional> -#include "ir/OperationCloner.h" -#include "exec/ExecutionObservers.h" -#include "exec/LinearExecutor.h" -#include "exec/DataflowExecutor.h" -#include "exec/ParallelExecutor.h" -#include "compiler/BackendManager.h" -#include "compiler/ExecutionBuilder.h" -#include "exec/ExecTime.h" -#include "compiler/Linear.h" -#include "compiler/BackendManager.h" -#include "backend/IPortableTensor.h" #include "backend/builtin/Config.h" #include "backend/builtin/KernelGenerator.h" -#include "backend/builtin/UserTensor.h" #include "backend/builtin/TensorBuilder.h" -#include "util/TracingCtx.h" +#include "backend/builtin/UserTensor.h" +#include "backend/IPortableTensor.h" +#include "compiler/BackendManager.h" +#include "compiler/BackendManager.h" +#include "compiler/ExecutionBuilder.h" +#include "compiler/Linear.h" #include "dumper/text/GraphDumper.h" +#include "exec/DataflowExecutor.h" +#include "exec/ExecTime.h" +#include "exec/ExecutionObservers.h" +#include "exec/LinearExecutor.h" +#include "exec/ParallelExecutor.h" +#include "ir/OperationCloner.h" +#include "util/TracingCtx.h" +#include <functional> #include <memory> namespace onert @@ -282,6 +281,42 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap }); } +void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, + const std::shared_ptr<exec::ExecutorMap> &executor_map, + const backend::BackendContexts &backend_contexts) +{ + for (auto &pair : backend_contexts) + { + auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get()); + if (builtin_context != nullptr) + { + auto builtin_kernel_gen = builtin_context->kernel_gen; + builtin_kernel_gen->setTensorRegistries(tensor_regs); + builtin_kernel_gen->setExecutorMap(executor_map); + } + } +} + +std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> +ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts) +{ + std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; + + for (auto &pair : backend_contexts) + { + // NOTE builtin backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "builtin") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); + } + + return ordered_contexts; +} + exec::IExecutor * ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, @@ -311,32 +346,12 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - for (auto &pair : backend_contexts) - { - auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get()); - if (builtin_context != nullptr) - { - auto builtin_kernel_gen = builtin_context->kernel_gen; - builtin_kernel_gen->setTensorRegistries(tensor_regs); - builtin_kernel_gen->setExecutorMap(executor_map); - } - } + prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); ExecutionBuilder builder; // Adjust the order of backends for the upcoming iteration - std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; - for (auto &pair : backend_contexts) - { - // NOTE builtin backend must be processed lastly. - // This is because of Permute layer's specialty which is the only operation that could have - // different ITensor objects for the input and the output. And it requires all other backends' - // tensors are ready to use. - if (pair.first->config()->id() == "builtin") - ordered_contexts.emplace_back(pair.first, pair.second.get()); - else - ordered_contexts.emplace_front(pair.first, pair.second.get()); - } + auto ordered_contexts = orderBackendContext(backend_contexts); // Simulate the execution for deallocation of tensors std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map; @@ -447,32 +462,12 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - for (auto &pair : backend_contexts) - { - auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get()); - if (builtin_context != nullptr) - { - auto builtin_kernel_gen = builtin_context->kernel_gen; - builtin_kernel_gen->setTensorRegistries(tensor_regs); - builtin_kernel_gen->setExecutorMap(executor_map); - } - } + prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); ExecutionBuilder builder; // Adjust the order of backends for the upcoming iteration - std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts; - for (auto &pair : backend_contexts) - { - // NOTE builtin backend must be processed lastly. - // This is because of Permute layer's specialty which is the only operation that could have - // different ITensor objects for the input and the output. And it requires all other backends' - // tensors are ready to use. - if (pair.first->config()->id() == "builtin") - ordered_contexts.emplace_back(pair.first, pair.second.get()); - else - ordered_contexts.emplace_front(pair.first, pair.second.get()); - } + auto ordered_contexts = orderBackendContext(backend_contexts); // Generate kernels for (auto &pair : ordered_contexts) diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index 5fe1617a6..2ee05fae3 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -17,12 +17,14 @@ #ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__ #define __ONERT_COMPILER_EXECUTOR_FACTORY_H__ -#include <unordered_map> +#include "TensorRegistries.h" #include "backend/ITensor.h" -#include "exec/IExecutor.h" #include "compiler/LoweredGraph.h" -#include "TensorRegistries.h" +#include "exec/IExecutor.h" + +#include <deque> +#include <unordered_map> namespace onert { @@ -45,6 +47,12 @@ private: private: static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts); + static void prepareBuiltinBackend(const TensorRegistries &tensor_regs, + const std::shared_ptr<exec::ExecutorMap> &executor_map, + const backend::BackendContexts &backend_contexts); + static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> + orderBackendContext(const backend::BackendContexts &backend_contexts); + static exec::IExecutor * createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 3b84d02de..999bffa7c 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -42,85 +42,19 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { - // set tracing_ctx for copied graph - if (options.tracing_ctx) - { - auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph); - options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value()); - } - - // Build backend contexts - auto &backend_manager = BackendManager::get(); - // Create contexts for other backends - for (auto backend_str : options.backend_list) - { - backend_manager.loadBackend(backend_str); - auto backend = backend_manager.get(backend_str); - - // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some - // are not available on x64 or some other platforms. So this may be a workaround for x64 and - // we should change it back(throw if backend is not loaded) later. - if (!backend) - { - VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl; - continue; - } - } - if (backend_manager.num_backends() == 0) - throw std::runtime_error{"No available backends loaded."}; - - // TODO Move "schedule" phase out of here - // Schedule - std::unique_ptr<BackendResolver> backend_resolver; - auto all_backends = backend_manager.getAll(); - if (options.he_scheduler) - { - auto scheduler = HEScheduler(all_backends, options); - backend_resolver = scheduler.schedule(_graph); - _indexed_ranks = scheduler.getIndexedRanks(); - } - else - { - auto scheduler = ManualScheduler(all_backends, options); - backend_resolver = scheduler.schedule(_graph); - } - - makeLowerInfo(*backend_resolver); - VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl; - dumper::text::dumpLoweredGraph(*this); - - // Mandatory passes - kind of legalization(?) - pass::PassRunner{} - .append(std::make_unique<pass::ConstantInsertionPass>(*this)) - .append(std::make_unique<pass::ConstantLoweringPass>(*this)) - .append(std::make_unique<pass::PermutationOperationPass>(*this)) - .append(std::make_unique<pass::PermutationInsertionPass>(*this)) - .run(); - - dumpLowerInfo(); - - // Optimization passes (optional) - pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run(); - - VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl; - for (auto operand : _graph.getInputs()) - VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl; - for (auto operand : _graph.getOutputs()) - VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl; - dumper::text::dumpLoweredGraph(*this); - - // Graph verifications - { - assert(ir::verifier::InputOutputChecker().verify(_graph)); - assert(ir::verifier::DAGChecker().verify(_graph)); - assert(ir::verifier::EdgeChecker().verify(_graph)); - } + lowerGraph(graph, options); } +// TODO Design better class and constructor to represent parent_graph LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}, _parent_graph{parent_graph} { + lowerGraph(graph, options); +} + +void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options) +{ // set tracing_ctx for copied graph if (options.tracing_ctx) { diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h index 8e343cffa..eb54b67ae 100644 --- a/runtime/onert/core/src/exec/IPermuteFunction.h +++ b/runtime/onert/core/src/exec/IPermuteFunction.h @@ -145,6 +145,9 @@ protected: case ir::DataType::INT64: permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); break; + case ir::DataType::QUANT_INT16_SYMM: + permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets); + break; default: throw std::runtime_error("IPermuteFunction: Not supported data type"); break; @@ -338,6 +341,8 @@ protected: case ir::DataType::QUANT_INT8_ASYMM: case ir::DataType::QUANT_INT8_SYMM: return typeid(int8_t); + case ir::DataType::QUANT_INT16_SYMM: + return typeid(int16_t); default: throw std::runtime_error("IPermuteFunction: Not supported data type"); } diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 8e75c4f53..07670c720 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -50,6 +50,8 @@ size_t sizeOfDataType(DataType data_type) return sizeof(int64_t); case DataType::QUANT_INT16_ASYMM: return sizeof(int16_t); + case DataType::QUANT_INT16_SYMM: + return sizeof(int16_t); default: throw std::runtime_error{"Unsupported type size"}; } diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index 80e2a3f7a..0b596ff13 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -29,19 +29,21 @@ using namespace operation; namespace { -void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "") + +// Dump all input and output. +// Use this function when there is no special input or(and) output. +void dumpOpGeneric(const Operation &node, const std::string &adding_input = "") { VERBOSE(LIR) << "* " << node.name() << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs() << ")" << std::endl; } -void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "") +void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "") { VERBOSE(LIR) << "* " << node.name() << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(1) - << ") " << adding_input << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input + << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } @@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type) << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl; VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } - -void dumpPackingOp(const Operation &node) -{ - VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - for (auto i : node.getInputs()) - { - inputs += std::to_string(i.value()) + ","; - } - VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} } // namespace OperationDumper::OperationDumper(const std::string &start_msg) @@ -86,7 +76,7 @@ void OperationDumper::visit(const BatchToSpaceND &node) std::string block_size = "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")"; - dumpUnaryInputOp(node, block_size); + dumpOpGeneric(node, block_size); } void OperationDumper::visit(const BCQFullyConnected &node) @@ -103,13 +93,13 @@ void OperationDumper::visit(const BCQFullyConnected &node) VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); } +void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Conv2D &node) { @@ -118,11 +108,11 @@ void OperationDumper::visit(const Conv2D &node) dumpConvOp(node, padding_type); } -void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); } void OperationDumper::visit(const DepthwiseConv2D &node) { @@ -143,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node) { params = " alpha value(" + std::to_string(node.param().alpha) + ")"; } - dumpUnaryInputOp(node, params); + dumpOpGeneric(node, params); } -void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); } void OperationDumper::visit(const EmbeddingLookup &node) { @@ -208,9 +198,9 @@ void OperationDumper::visit(const InstanceNorm &node) dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); } void OperationDumper::visit(const LSTM &node) { @@ -258,7 +248,7 @@ void OperationDumper::visit(const LSTM &node) << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl; } -void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); } +void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Pad &node) { @@ -297,16 +287,16 @@ void OperationDumper::visit(const Pool2D &node) VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); } void OperationDumper::visit(const PReLU &node) { std::string alpha = "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")"; - dumpUnaryInputOp(node, alpha); + dumpOpGeneric(node, alpha); } -void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); } @@ -320,37 +310,9 @@ void OperationDumper::visit(const Reshape &node) dumpUnaryInputOp(node, shape); } -void OperationDumper::visit(const ResizeBilinear &node) -{ - if (node.getInputs().size() == 1) - { - dumpUnaryInputOp(node); - } - else if (node.getInputs().size() == 2) - { - dumpBinaryInputOp(node); - } - else - { - VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl; - } -} +void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const ResizeNearestNeighbor &node) -{ - if (node.getInputs().size() == 1) - { - dumpUnaryInputOp(node); - } - else if (node.getInputs().size() == 2) - { - dumpBinaryInputOp(node); - } - else - { - VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl; - } -} +void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Reverse &node) { @@ -391,9 +353,9 @@ void OperationDumper::visit(const Select &node) VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); } void OperationDumper::visit(const SpaceToBatchND &node) { @@ -404,11 +366,11 @@ void OperationDumper::visit(const SpaceToBatchND &node) dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const Split &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); } -void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); } void OperationDumper::visit(const StatelessRandomUniform &node) { @@ -419,7 +381,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node) VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); } +void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); } @@ -454,22 +416,14 @@ void OperationDumper::visit(const TransposeConv &node) VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Transpose &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); } void OperationDumper::visit(const Unpack &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } void OperationDumper::visit(const OneHot &node) @@ -483,51 +437,21 @@ void OperationDumper::visit(const OneHot &node) void OperationDumper::visit(const If &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - const auto &input_indices = node.getInputs(); - for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) - { - inputs += std::to_string(it->value()); - if (std::next(it) != std::end(input_indices)) - inputs += ", "; - } VERBOSE(LIR) << " - Inputs : " << "Then subgraph (" << node.param().then_subg_index << ") Else subgraph (" - << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl; + << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } void OperationDumper::visit(const While &node) { VERBOSE(LIR) << "* " << node.name() << std::endl; - std::string inputs; - const auto &input_indices = node.getInputs(); - for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) - { - inputs += std::to_string(it->value()); - if (std::next(it) != std::end(input_indices)) - inputs += ", "; - } VERBOSE(LIR) << " - Inputs : " << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph (" - << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl; - std::string outputs; - const auto &output_indices = node.getOutputs(); - for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) - { - outputs += std::to_string(it->value()); - if (std::next(it) != std::end(output_indices)) - outputs += ", "; - } - VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl; + << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")" + << std::endl; + VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl; } } // namespace ir diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc index 110aeebe7..a51e12dff 100644 --- a/runtime/onert/core/src/ir/operation/AddN.cc +++ b/runtime/onert/core/src/ir/operation/AddN.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/AddN.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc index e918d27ae..ccda674ad 100644 --- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc +++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BCQFullyConnected.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc index f9dfaa3f6..1ca5b0c9f 100644 --- a/runtime/onert/core/src/ir/operation/BCQGather.cc +++ b/runtime/onert/core/src/ir/operation/BCQGather.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BCQGather.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc index e58e0f486..3c5578ac4 100644 --- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc +++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BatchToSpaceND.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc index 2d439194f..5eb3fc3d7 100644 --- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc +++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/BinaryArithmetic.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc index 5da7b5abc..eab6c0611 100644 --- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc +++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/BroadcastTo.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc new file mode 100644 index 000000000..4b96c9d94 --- /dev/null +++ b/runtime/onert/core/src/ir/operation/Bulk.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/Bulk.h" +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ +void Bulk::accept(OperationVisitor &v) const { v.visit(*this); } + +Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Bulk::Param ¶m) + : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param} +{ +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc index 94c96ff69..33365657c 100644 --- a/runtime/onert/core/src/ir/operation/Comparison.cc +++ b/runtime/onert/core/src/ir/operation/Comparison.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Comparison.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc index 5d99debb7..3a21e36f2 100644 --- a/runtime/onert/core/src/ir/operation/Concat.cc +++ b/runtime/onert/core/src/ir/operation/Concat.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Concat.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc index 725f3e70b..d615ae416 100644 --- a/runtime/onert/core/src/ir/operation/Conv2D.cc +++ b/runtime/onert/core/src/ir/operation/Conv2D.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Conv2D.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc index 822eb30a9..365745ea8 100644 --- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc +++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ConvertFp16ToFp32.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc index 5e5b42f3b..d4fc7031c 100644 --- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc +++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ConvertFp32ToFp16.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc index 197c7ee48..e3edea777 100644 --- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc +++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/DepthToSpace.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc index bef75c5cf..0e7137306 100644 --- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/DepthwiseConv2D.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc index f3e942f7d..e83c26e28 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseActivation.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc index 155b660dc..b22bed7bc 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseBinary.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc index c21c51c05..fd463e0fe 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/ElementwiseUnary.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc index e23674706..66b80b2c5 100644 --- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc +++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/EmbeddingLookup.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc index 50e3636f3..e421bc383 100644 --- a/runtime/onert/core/src/ir/operation/ExpandDims.cc +++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ExpandDims.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc index 4a13737d4..60355c609 100644 --- a/runtime/onert/core/src/ir/operation/Fill.cc +++ b/runtime/onert/core/src/ir/operation/Fill.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Fill.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc index 335b7b209..3533df097 100644 --- a/runtime/onert/core/src/ir/operation/FullyConnected.cc +++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/FullyConnected.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc index 96a39b3f2..e0c4630a0 100644 --- a/runtime/onert/core/src/ir/operation/Gather.cc +++ b/runtime/onert/core/src/ir/operation/Gather.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Gather.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc index 2974679d4..5d1589cd1 100644 --- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc +++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/HashtableLookup.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc index d9af9d0b7..9fb55383e 100644 --- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc +++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/InstanceNorm.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc index 0184ef628..6725df596 100644 --- a/runtime/onert/core/src/ir/operation/L2Normalization.cc +++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/L2Normalization.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc index 45a1fd120..06e66158b 100644 --- a/runtime/onert/core/src/ir/operation/LSTM.cc +++ b/runtime/onert/core/src/ir/operation/LSTM.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LSTM.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc index 52037cc72..73fca9938 100644 --- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc +++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LocalResponseNormalization.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc index 51f6a6c5d..d580e63e1 100644 --- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc +++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/LogSoftmax.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc index 6046e36fe..e52bddc1f 100644 --- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc +++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/MatrixBandPart.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc index 5ed31c2b9..87bd12e60 100644 --- a/runtime/onert/core/src/ir/operation/PReLU.cc +++ b/runtime/onert/core/src/ir/operation/PReLU.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/PReLU.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc index 571965de8..813fbaf30 100644 --- a/runtime/onert/core/src/ir/operation/Permute.cc +++ b/runtime/onert/core/src/ir/operation/Permute.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Permute.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc index cbb42a80a..e32b876e6 100644 --- a/runtime/onert/core/src/ir/operation/Pool2D.cc +++ b/runtime/onert/core/src/ir/operation/Pool2D.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/Pool2D.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc index f1df54c60..f7c159a12 100644 --- a/runtime/onert/core/src/ir/operation/Pow.cc +++ b/runtime/onert/core/src/ir/operation/Pow.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Pow.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc index a40e5bdc9..988a50669 100644 --- a/runtime/onert/core/src/ir/operation/RNN.cc +++ b/runtime/onert/core/src/ir/operation/RNN.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/RNN.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc index f85d52cb0..8ced92a0b 100644 --- a/runtime/onert/core/src/ir/operation/Range.cc +++ b/runtime/onert/core/src/ir/operation/Range.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Range.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc index c33ed0a80..40797bf29 100644 --- a/runtime/onert/core/src/ir/operation/Rank.cc +++ b/runtime/onert/core/src/ir/operation/Rank.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Rank.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc index 0811f1c37..8da1940fa 100644 --- a/runtime/onert/core/src/ir/operation/Reduce.cc +++ b/runtime/onert/core/src/ir/operation/Reduce.cc @@ -15,12 +15,10 @@ */ #include "ir/operation/Reduce.h" +#include "ir/OperationVisitor.h" -#include <cassert> #include <unordered_map> -#include "ir/OperationVisitor.h" - namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc index 54c12574a..0ed4affa1 100644 --- a/runtime/onert/core/src/ir/operation/Reshape.cc +++ b/runtime/onert/core/src/ir/operation/Reshape.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Reshape.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc index 7c9f5e104..7d256f447 100644 --- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc +++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ResizeBilinear.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc index 9792b292d..58be87b95 100644 --- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc +++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/ResizeNearestNeighbor.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc index 471457739..6c3746426 100644 --- a/runtime/onert/core/src/ir/operation/Reverse.cc +++ b/runtime/onert/core/src/ir/operation/Reverse.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Reverse.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc index 1b2cd6241..f90924488 100644 --- a/runtime/onert/core/src/ir/operation/Shape.cc +++ b/runtime/onert/core/src/ir/operation/Shape.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Shape.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc index 91850fa33..c06c85309 100644 --- a/runtime/onert/core/src/ir/operation/Softmax.cc +++ b/runtime/onert/core/src/ir/operation/Softmax.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Softmax.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc index 97c630888..94acccb0c 100644 --- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc +++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SpaceToBatchND.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc index e1fd27a55..08e7e5190 100644 --- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc +++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SpaceToDepth.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc index 96822822b..3e371188d 100644 --- a/runtime/onert/core/src/ir/operation/Split.cc +++ b/runtime/onert/core/src/ir/operation/Split.cc @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/Split.h" -#include <cassert> #include "ir/OperationVisitor.h" + namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc index 38918cd81..be13f167e 100644 --- a/runtime/onert/core/src/ir/operation/SplitV.cc +++ b/runtime/onert/core/src/ir/operation/SplitV.cc @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/SplitV.h" -#include <cassert> #include "ir/OperationVisitor.h" + namespace onert { namespace ir diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc index 705b60abc..db93903c7 100644 --- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc +++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/SquaredDifference.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc index 18f1cf5a6..94be0be86 100644 --- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc +++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/StatelessRandomUniform.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc index e8278b456..a38282c93 100644 --- a/runtime/onert/core/src/ir/operation/StridedSlice.cc +++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/StridedSlice.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc index 0ec785579..51c1ff1dc 100644 --- a/runtime/onert/core/src/ir/operation/Tile.cc +++ b/runtime/onert/core/src/ir/operation/Tile.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Tile.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc index a1f39202d..e1723d180 100644 --- a/runtime/onert/core/src/ir/operation/TopKV2.cc +++ b/runtime/onert/core/src/ir/operation/TopKV2.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/TopKV2.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc index f2ee52f0e..dbc5ef2aa 100644 --- a/runtime/onert/core/src/ir/operation/Transpose.cc +++ b/runtime/onert/core/src/ir/operation/Transpose.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/Transpose.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc index 1f405dc6b..944cc365d 100644 --- a/runtime/onert/core/src/ir/operation/TransposeConv.cc +++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc @@ -15,9 +15,6 @@ */ #include "ir/operation/TransposeConv.h" - -#include <cassert> - #include "ir/OperationVisitor.h" namespace onert diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc index 90d3c0c07..185eddce3 100644 --- a/runtime/onert/core/src/ir/operation/Unpack.cc +++ b/runtime/onert/core/src/ir/operation/Unpack.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/Unpack.h" #include "ir/OperationVisitor.h" diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc index 8a6f5c01e..f35996b07 100644 --- a/runtime/onert/core/src/ir/operation/While.cc +++ b/runtime/onert/core/src/ir/operation/While.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "ir/operation/While.h" #include "ir/OperationVisitor.h" diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index 6ba7ee922..5649f286d 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -96,6 +96,7 @@ protected: ir::Activation convertActivation(ActivationFunctionType type); ir::DataType tensorTypeToDataType(TensorType type); ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx); + flexbuffers::Map getCustomOpAttrMap(const Operator *op); // Create operands form tflite::Tensor ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg); @@ -110,6 +111,16 @@ protected: void loadStridesAndPaddings(Param ¶m, const OptionsType *options); // Load Pool2D param template <typename Param> void loadPool2DOptions(Param ¶m, const Pool2DOptions *options); + // Get BuiltinOperator + BuiltinOperator getBuiltinOperator(const Operator *op) + { + auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index()); + auto builtin_op = builtin_opcode->builtin_code(); + if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES) + builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code()); + + return builtin_op; + } private: virtual std::unique_ptr<ir::Graph> loadSubgraph(const SubGraph *subg) = 0; @@ -291,6 +302,15 @@ ir::OperandIndex BaseLoader<LoaderDomain>::BaseLoader::tensorIdxToOperandIdx(int return isOptionalInputTensor(tensorIdx) ? ir::OperandIndex() : _tensor_to_operand[tensorIdx]; } +template <typename LoaderDomain> +flexbuffers::Map BaseLoader<LoaderDomain>::BaseLoader::getCustomOpAttrMap(const Operator *op) +{ + size_t custom_op_data_size = op->custom_options()->size(); + auto custom_op_data = op->custom_options()->Data(); + auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); + return data_root.AsMap(); +} + /* Copy is copied from tensorflow lite */ template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr) { @@ -545,7 +565,7 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn { // Optional tensors are not supported yet except for FULLY_CONNECTED and BCQ_FULLY_CONNECTED auto check_optional_input = [&]() { - auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + auto builtin_code = getBuiltinOperator(op); if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code)) throw std::runtime_error( std::string("loader doesn't support optional input tensor yet for ") @@ -748,10 +768,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg) } else { - size_t custom_op_data_size = op->custom_options()->size(); - auto custom_op_data = op->custom_options()->Data(); - auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); - auto attr_map = data_root.AsMap(); + const auto attr_map = getCustomOpAttrMap(op); const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>( attr_map["fused_activation_function"].AsInt8()); param.activation = convertActivation(fused_activation_func); @@ -876,10 +893,7 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg } else { - size_t custom_op_data_size = op->custom_options()->size(); - auto custom_op_data = op->custom_options()->Data(); - auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); - auto attr_map = data_root.AsMap(); + const auto attr_map = getCustomOpAttrMap(op); param.keep_dims = attr_map["keep_dims"].AsBool(); } @@ -931,8 +945,7 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg) template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg) { - const flexbuffers::Map &m = - flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap(); + const auto &m = getCustomOpAttrMap(op); ir::operation::DetectionPostProcess::Param param; @@ -972,14 +985,17 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su { ir::operation::BatchMatMul::Param param; - const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + const auto builtin_op = getBuiltinOperator(op); switch (builtin_op) { case BuiltinOperator::BuiltinOperator_BATCH_MATMUL: - param.adj_x = op->builtin_options_as_BatchMatMulOptions()->adjoint_lhs(); - param.adj_y = op->builtin_options_as_BatchMatMulOptions()->adjoint_rhs(); - break; + // Handled on each loader: different option name + // Circle: adjoint_lhs, adjoint_rhs + // TFLite: adj_x, adj_y + throw std::runtime_error( + std::string("Cannot handle here: ").append(EnumNameBuiltinOperator(builtin_op)) + " as " + + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL)); case BuiltinOperator::BuiltinOperator_CUSTOM: if (op->custom_options() == nullptr) { @@ -988,10 +1004,7 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su } else { - size_t custom_op_data_size = op->custom_options()->size(); - auto custom_op_data = op->custom_options()->Data(); - auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); - auto attr_map = data_root.AsMap(); + const auto attr_map = getCustomOpAttrMap(op); param.adj_x = attr_map["adj_x"].AsBool(); param.adj_y = attr_map["adj_y"].AsBool(); } @@ -1184,7 +1197,7 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &subg) { ir::operation::Comparison::Param param; - const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + const auto builtin_op = getBuiltinOperator(op); switch (builtin_op) { @@ -1224,10 +1237,7 @@ void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg) } else { - size_t custom_op_data_size = op->custom_options()->size(); - auto custom_op_data = op->custom_options()->Data(); - auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); - auto attr_map = data_root.AsMap(); + const auto attr_map = getCustomOpAttrMap(op); param.equation = attr_map["equation"].ToString(); } @@ -1247,10 +1257,7 @@ void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph } else { - size_t custom_op_data_size = op->custom_options()->size(); - auto custom_op_data = op->custom_options()->Data(); - auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); - auto attr_map = data_root.AsMap(); + const auto attr_map = getCustomOpAttrMap(op); param.is_training = attr_map["is_training"].AsBool(); param.epsilon = attr_map["epsilon"].AsFloat(); param.data_format = attr_map["data_format"].ToString(); @@ -1363,7 +1370,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op // loader doesn't support optional output tensor yet if (op->outputs()->size() != 1) { - auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + auto builtin_code = getBuiltinOperator(op); throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ") .append(EnumNameBuiltinOperator(builtin_code))); } @@ -1381,7 +1388,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg) { - const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + auto const builtin_op = getBuiltinOperator(op); switch (builtin_op) { diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index 4fb0e71d6..aae831d61 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -72,6 +72,12 @@ struct LoaderDomain class CircleLoader final : public base_loader::BaseLoader<LoaderDomain> { protected: + // Different option name + // Circle: adjoint_lhs, adjoint_rhs + // TFLite: adj_x, adj_y + void loadBatchMatMul(const Operator *op, ir::Graph &subg); + + // Only circle operations void loadInstanceNorm(const Operator *op, ir::Graph &subg); void loadBCQFullyConnected(const Operator *op, ir::Graph &subg); void loadBCQGather(const Operator *op, ir::Graph &subg); @@ -129,10 +135,13 @@ private: void loadOperation(const circle::Operator *op, ir::Graph &subg) { - const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); + auto const builtin_op = getBuiltinOperator(op); switch (builtin_op) { + case circle::BuiltinOperator::BuiltinOperator_BATCH_MATMUL: + loadBatchMatMul(op, subg); + return; case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM: loadInstanceNorm(op, subg); return; @@ -149,6 +158,23 @@ private: } }; +void CircleLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::BatchMatMul::Param param; + const auto *options = op->builtin_options_as_BatchMatMulOptions(); + + param.adj_x = options->adjoint_lhs(); + param.adj_y = options->adjoint_rhs(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param)); + subg.addOperation(std::move(new_op)); +} + void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h index eb1775297..e3c92eae0 100644 --- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h +++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h @@ -51,6 +51,9 @@ struct TensorBuilder; struct Conv2DOptions; struct Conv2DOptionsBuilder; +struct Conv3DOptions; +struct Conv3DOptionsBuilder; + struct Pool2DOptions; struct Pool2DOptionsBuilder; @@ -327,6 +330,9 @@ struct MatrixSetDiagOptionsBuilder; struct IfOptions; struct IfOptionsBuilder; +struct CallOnceOptions; +struct CallOnceOptionsBuilder; + struct WhileOptions; struct WhileOptionsBuilder; @@ -351,6 +357,39 @@ struct SegmentSumOptionsBuilder; struct BatchMatMulOptions; struct BatchMatMulOptionsBuilder; +struct CumsumOptions; +struct CumsumOptionsBuilder; + +struct BroadcastToOptions; +struct BroadcastToOptionsBuilder; + +struct Rfft2dOptions; +struct Rfft2dOptionsBuilder; + +struct HashtableOptions; +struct HashtableOptionsBuilder; + +struct HashtableFindOptions; +struct HashtableFindOptionsBuilder; + +struct HashtableImportOptions; +struct HashtableImportOptionsBuilder; + +struct HashtableSizeOptions; +struct HashtableSizeOptionsBuilder; + +struct VarHandleOptions; +struct VarHandleOptionsBuilder; + +struct ReadVariableOptions; +struct ReadVariableOptionsBuilder; + +struct AssignVariableOptions; +struct AssignVariableOptionsBuilder; + +struct RandomOptions; +struct RandomOptionsBuilder; + struct BCQGatherOptions; struct BCQGatherOptionsBuilder; @@ -375,10 +414,16 @@ struct BufferBuilder; struct Metadata; struct MetadataBuilder; +struct TensorMap; +struct TensorMapBuilder; + +struct SignatureDef; +struct SignatureDefBuilder; + struct Model; struct ModelBuilder; -enum TensorType +enum TensorType : int8_t { TensorType_FLOAT32 = 0, TensorType_FLOAT16 = 1, @@ -391,36 +436,43 @@ enum TensorType TensorType_COMPLEX64 = 8, TensorType_INT8 = 9, TensorType_FLOAT64 = 10, + TensorType_COMPLEX128 = 11, + TensorType_UINT64 = 12, + TensorType_RESOURCE = 13, + TensorType_VARIANT = 14, + TensorType_UINT32 = 15, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_FLOAT64 + TensorType_MAX = TensorType_UINT32 }; -inline const TensorType (&EnumValuesTensorType())[11] +inline const TensorType (&EnumValuesTensorType())[16] { - static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, - TensorType_UINT8, TensorType_INT64, TensorType_STRING, - TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, - TensorType_INT8, TensorType_FLOAT64}; + static const TensorType values[] = { + TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8, + TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16, + TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, TensorType_COMPLEX128, + TensorType_UINT64, TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32}; return values; } inline const char *const *EnumNamesTensorType() { - static const char *const names[12] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", - "INT64", "STRING", "BOOL", "INT16", - "COMPLEX64", "INT8", "FLOAT64", nullptr}; + static const char *const names[17] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", + "STRING", "BOOL", "INT16", "COMPLEX64", "INT8", + "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", "VARIANT", + "UINT32", nullptr}; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_FLOAT64)) + if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32)) return ""; const size_t index = static_cast<size_t>(e); return EnumNamesTensorType()[index]; } -enum QuantizationDetails +enum QuantizationDetails : uint8_t { QuantizationDetails_NONE = 0, QuantizationDetails_CustomQuantization = 1, @@ -465,7 +517,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum DimensionType +enum DimensionType : int8_t { DimensionType_DENSE = 0, DimensionType_SPARSE_CSR = 1, @@ -493,7 +545,7 @@ inline const char *EnumNameDimensionType(DimensionType e) return EnumNamesDimensionType()[index]; } -enum SparseIndexVector +enum SparseIndexVector : uint8_t { SparseIndexVector_NONE = 0, SparseIndexVector_Int32Vector = 1, @@ -552,8 +604,11 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum BuiltinOperator +enum BuiltinOperator : int32_t { + BuiltinOperator_BCQ_GATHER = -4, + BuiltinOperator_BCQ_FULLY_CONNECTED = -3, + BuiltinOperator_INSTANCE_NORM = -2, BuiltinOperator_ADD = 0, BuiltinOperator_AVERAGE_POOL_2D = 1, BuiltinOperator_CONCATENATION = 2, @@ -681,16 +736,36 @@ enum BuiltinOperator BuiltinOperator_DENSIFY = 124, BuiltinOperator_SEGMENT_SUM = 125, BuiltinOperator_BATCH_MATMUL = 126, - BuiltinOperator_BCQ_GATHER = 252, - BuiltinOperator_BCQ_FULLY_CONNECTED = 253, - BuiltinOperator_INSTANCE_NORM = 254, - BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM -}; - -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130] -{ - static const BuiltinOperator values[] = {BuiltinOperator_ADD, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + BuiltinOperator_CUMSUM = 128, + BuiltinOperator_CALL_ONCE = 129, + BuiltinOperator_BROADCAST_TO = 130, + BuiltinOperator_RFFT2D = 131, + BuiltinOperator_CONV_3D = 132, + BuiltinOperator_IMAG = 133, + BuiltinOperator_REAL = 134, + BuiltinOperator_COMPLEX_ABS = 135, + BuiltinOperator_HASHTABLE = 136, + BuiltinOperator_HASHTABLE_FIND = 137, + BuiltinOperator_HASHTABLE_IMPORT = 138, + BuiltinOperator_HASHTABLE_SIZE = 139, + BuiltinOperator_REDUCE_ALL = 140, + BuiltinOperator_CONV_3D_TRANSPOSE = 141, + BuiltinOperator_VAR_HANDLE = 142, + BuiltinOperator_READ_VARIABLE = 143, + BuiltinOperator_ASSIGN_VARIABLE = 144, + BuiltinOperator_BROADCAST_ARGS = 145, + BuiltinOperator_RANDOM_STANDARD_NORMAL = 146, + BuiltinOperator_MIN = BuiltinOperator_BCQ_GATHER, + BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL +}; + +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[150] +{ + static const BuiltinOperator values[] = {BuiltinOperator_BCQ_GATHER, + BuiltinOperator_BCQ_FULLY_CONNECTED, + BuiltinOperator_INSTANCE_NORM, + BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, BuiltinOperator_CONCATENATION, BuiltinOperator_CONV_2D, @@ -817,15 +892,36 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130] BuiltinOperator_DENSIFY, BuiltinOperator_SEGMENT_SUM, BuiltinOperator_BATCH_MATMUL, - BuiltinOperator_BCQ_GATHER, - BuiltinOperator_BCQ_FULLY_CONNECTED, - BuiltinOperator_INSTANCE_NORM}; + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES, + BuiltinOperator_CUMSUM, + BuiltinOperator_CALL_ONCE, + BuiltinOperator_BROADCAST_TO, + BuiltinOperator_RFFT2D, + BuiltinOperator_CONV_3D, + BuiltinOperator_IMAG, + BuiltinOperator_REAL, + BuiltinOperator_COMPLEX_ABS, + BuiltinOperator_HASHTABLE, + BuiltinOperator_HASHTABLE_FIND, + BuiltinOperator_HASHTABLE_IMPORT, + BuiltinOperator_HASHTABLE_SIZE, + BuiltinOperator_REDUCE_ALL, + BuiltinOperator_CONV_3D_TRANSPOSE, + BuiltinOperator_VAR_HANDLE, + BuiltinOperator_READ_VARIABLE, + BuiltinOperator_ASSIGN_VARIABLE, + BuiltinOperator_BROADCAST_ARGS, + BuiltinOperator_RANDOM_STANDARD_NORMAL}; return values; } inline const char *const *EnumNamesBuiltinOperator() { - static const char *const names[256] = {"ADD", + static const char *const names[152] = {"BCQ_GATHER", + "BCQ_FULLY_CONNECTED", + "INSTANCE_NORM", + "", + "ADD", "AVERAGE_POOL_2D", "CONCATENATION", "CONV_2D", @@ -952,147 +1048,40 @@ inline const char *const *EnumNamesBuiltinOperator() "DENSIFY", "SEGMENT_SUM", "BATCH_MATMUL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "BCQ_GATHER", - "BCQ_FULLY_CONNECTED", - "INSTANCE_NORM", + "PLACEHOLDER_FOR_GREATER_OP_CODES", + "CUMSUM", + "CALL_ONCE", + "BROADCAST_TO", + "RFFT2D", + "CONV_3D", + "IMAG", + "REAL", + "COMPLEX_ABS", + "HASHTABLE", + "HASHTABLE_FIND", + "HASHTABLE_IMPORT", + "HASHTABLE_SIZE", + "REDUCE_ALL", + "CONV_3D_TRANSPOSE", + "VAR_HANDLE", + "READ_VARIABLE", + "ASSIGN_VARIABLE", + "BROADCAST_ARGS", + "RANDOM_STANDARD_NORMAL", nullptr}; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_INSTANCE_NORM)) + if (flatbuffers::IsOutRange(e, BuiltinOperator_BCQ_GATHER, + BuiltinOperator_RANDOM_STANDARD_NORMAL)) return ""; - const size_t index = static_cast<size_t>(e); + const size_t index = static_cast<size_t>(e) - static_cast<size_t>(BuiltinOperator_BCQ_GATHER); return EnumNamesBuiltinOperator()[index]; } -enum BuiltinOptions +enum BuiltinOptions : uint8_t { BuiltinOptions_NONE = 0, BuiltinOptions_Conv2DOptions = 1, @@ -1196,6 +1185,19 @@ enum BuiltinOptions BuiltinOptions_DensifyOptions = 99, BuiltinOptions_SegmentSumOptions = 100, BuiltinOptions_BatchMatMulOptions = 101, + BuiltinOptions_CumsumOptions = 102, + BuiltinOptions_CallOnceOptions = 103, + BuiltinOptions_BroadcastToOptions = 104, + BuiltinOptions_Rfft2dOptions = 105, + BuiltinOptions_Conv3DOptions = 106, + BuiltinOptions_HashtableOptions = 107, + BuiltinOptions_HashtableFindOptions = 108, + BuiltinOptions_HashtableImportOptions = 109, + BuiltinOptions_HashtableSizeOptions = 110, + BuiltinOptions_VarHandleOptions = 111, + BuiltinOptions_ReadVariableOptions = 112, + BuiltinOptions_AssignVariableOptions = 113, + BuiltinOptions_RandomOptions = 114, BuiltinOptions_BCQGatherOptions = 252, BuiltinOptions_BCQFullyConnectedOptions = 253, BuiltinOptions_InstanceNormOptions = 254, @@ -1203,7 +1205,7 @@ enum BuiltinOptions BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105] +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[118] { static const BuiltinOptions values[] = {BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1307,6 +1309,19 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105] BuiltinOptions_DensifyOptions, BuiltinOptions_SegmentSumOptions, BuiltinOptions_BatchMatMulOptions, + BuiltinOptions_CumsumOptions, + BuiltinOptions_CallOnceOptions, + BuiltinOptions_BroadcastToOptions, + BuiltinOptions_Rfft2dOptions, + BuiltinOptions_Conv3DOptions, + BuiltinOptions_HashtableOptions, + BuiltinOptions_HashtableFindOptions, + BuiltinOptions_HashtableImportOptions, + BuiltinOptions_HashtableSizeOptions, + BuiltinOptions_VarHandleOptions, + BuiltinOptions_ReadVariableOptions, + BuiltinOptions_AssignVariableOptions, + BuiltinOptions_RandomOptions, BuiltinOptions_BCQGatherOptions, BuiltinOptions_BCQFullyConnectedOptions, BuiltinOptions_InstanceNormOptions}; @@ -1417,19 +1432,19 @@ inline const char *const *EnumNamesBuiltinOptions() "DensifyOptions", "SegmentSumOptions", "BatchMatMulOptions", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "CumsumOptions", + "CallOnceOptions", + "BroadcastToOptions", + "Rfft2dOptions", + "Conv3DOptions", + "HashtableOptions", + "HashtableFindOptions", + "HashtableImportOptions", + "HashtableSizeOptions", + "VarHandleOptions", + "ReadVariableOptions", + "AssignVariableOptions", + "RandomOptions", "", "", "", @@ -2092,6 +2107,71 @@ template <> struct BuiltinOptionsTraits<circle::BatchMatMulOptions> static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; }; +template <> struct BuiltinOptionsTraits<circle::CumsumOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::CallOnceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::BroadcastToOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::Rfft2dOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::Conv3DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::HashtableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::HashtableFindOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::HashtableImportOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::HashtableSizeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::VarHandleOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::ReadVariableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::AssignVariableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; +}; + +template <> struct BuiltinOptionsTraits<circle::RandomOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions; +}; + template <> struct BuiltinOptionsTraits<circle::BCQGatherOptions> { static const BuiltinOptions enum_value = BuiltinOptions_BCQGatherOptions; @@ -2112,7 +2192,7 @@ bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum Padding +enum Padding : int8_t { Padding_SAME = 0, Padding_VALID = 1, @@ -2140,7 +2220,7 @@ inline const char *EnumNamePadding(Padding e) return EnumNamesPadding()[index]; } -enum ActivationFunctionType +enum ActivationFunctionType : int8_t { ActivationFunctionType_NONE = 0, ActivationFunctionType_RELU = 1, @@ -2175,7 +2255,7 @@ inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) return EnumNamesActivationFunctionType()[index]; } -enum LSHProjectionType +enum LSHProjectionType : int8_t { LSHProjectionType_UNKNOWN = 0, LSHProjectionType_SPARSE = 1, @@ -2205,7 +2285,7 @@ inline const char *EnumNameLSHProjectionType(LSHProjectionType e) return EnumNamesLSHProjectionType()[index]; } -enum FullyConnectedOptionsWeightsFormat +enum FullyConnectedOptionsWeightsFormat : int8_t { FullyConnectedOptionsWeightsFormat_DEFAULT = 0, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, @@ -2237,7 +2317,7 @@ inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOpti } } -enum LSTMKernelType +enum LSTMKernelType : int8_t { LSTMKernelType_FULL = 0, LSTMKernelType_BASIC = 1, @@ -2265,7 +2345,7 @@ inline const char *EnumNameLSTMKernelType(LSTMKernelType e) return EnumNamesLSTMKernelType()[index]; } -enum CombinerType +enum CombinerType : int8_t { CombinerType_SUM = 0, CombinerType_MEAN = 1, @@ -2294,7 +2374,7 @@ inline const char *EnumNameCombinerType(CombinerType e) return EnumNamesCombinerType()[index]; } -enum MirrorPadMode +enum MirrorPadMode : int8_t { MirrorPadMode_REFLECT = 0, MirrorPadMode_SYMMETRIC = 1, @@ -2322,7 +2402,7 @@ inline const char *EnumNameMirrorPadMode(MirrorPadMode e) return EnumNamesMirrorPadMode()[index]; } -enum CustomOptionsFormat +enum CustomOptionsFormat : int8_t { CustomOptionsFormat_FLEXBUFFERS = 0, CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, @@ -2349,7 +2429,7 @@ inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) return EnumNamesCustomOptionsFormat()[index]; } -enum DataFormat +enum DataFormat : int8_t { DataFormat_CHANNELS_LAST = 0, DataFormat_CHANNELS_FIRST = 1, @@ -2408,7 +2488,6 @@ struct CustomQuantizationBuilder { start_ = fbb_.StartTable(); } - CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &); flatbuffers::Offset<CustomQuantization> Finish() { const auto end = fbb_.EndTable(start_); @@ -2539,7 +2618,6 @@ struct QuantizationParametersBuilder { start_ = fbb_.StartTable(); } - QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); flatbuffers::Offset<QuantizationParameters> Finish() { const auto end = fbb_.EndTable(start_); @@ -2613,7 +2691,6 @@ struct Int32VectorBuilder { start_ = fbb_.StartTable(); } - Int32VectorBuilder &operator=(const Int32VectorBuilder &); flatbuffers::Offset<Int32Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2670,7 +2747,6 @@ struct Uint16VectorBuilder { start_ = fbb_.StartTable(); } - Uint16VectorBuilder &operator=(const Uint16VectorBuilder &); flatbuffers::Offset<Uint16Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2731,7 +2807,6 @@ struct Uint8VectorBuilder { start_ = fbb_.StartTable(); } - Uint8VectorBuilder &operator=(const Uint8VectorBuilder &); flatbuffers::Offset<Uint8Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2912,7 +2987,6 @@ struct DimensionMetadataBuilder { start_ = fbb_.StartTable(); } - DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &); flatbuffers::Offset<DimensionMetadata> Finish() { const auto end = fbb_.EndTable(start_); @@ -2994,7 +3068,6 @@ struct SparsityParametersBuilder { start_ = fbb_.StartTable(); } - SparsityParametersBuilder &operator=(const SparsityParametersBuilder &); flatbuffers::Offset<SparsityParameters> Finish() { const auto end = fbb_.EndTable(start_); @@ -3121,7 +3194,6 @@ struct TensorBuilder { start_ = fbb_.StartTable(); } - TensorBuilder &operator=(const TensorBuilder &); flatbuffers::Offset<Tensor> Finish() { const auto end = fbb_.EndTable(start_); @@ -3235,7 +3307,6 @@ struct Conv2DOptionsBuilder { start_ = fbb_.StartTable(); } - Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); flatbuffers::Offset<Conv2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3260,6 +3331,116 @@ inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions( return builder_.Finish(); } +struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef Conv3DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_PADDING = 4, + VT_STRIDE_D = 6, + VT_STRIDE_W = 8, + VT_STRIDE_H = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_D_FACTOR = 14, + VT_DILATION_W_FACTOR = 16, + VT_DILATION_H_FACTOR = 18 + }; + circle::Padding padding() const + { + return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } + int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + circle::ActivationFunctionType fused_activation_function() const + { + return static_cast<circle::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_D) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct Conv3DOptionsBuilder +{ + typedef Conv3DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(circle::Padding padding) + { + fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_d(int32_t stride_d) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_d_factor(int32_t dilation_d_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<Conv3DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Conv3DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions( + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + Conv3DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_dilation_d_factor(dilation_d_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_stride_d(stride_d); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef Pool2DOptionsBuilder Builder; @@ -3330,7 +3511,6 @@ struct Pool2DOptionsBuilder { start_ = fbb_.StartTable(); } - Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); flatbuffers::Offset<Pool2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3431,7 +3611,6 @@ struct DepthwiseConv2DOptionsBuilder { start_ = fbb_.StartTable(); } - DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); flatbuffers::Offset<DepthwiseConv2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3509,7 +3688,6 @@ struct ConcatEmbeddingsOptionsBuilder { start_ = fbb_.StartTable(); } - ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3574,7 +3752,6 @@ struct LSHProjectionOptionsBuilder { start_ = fbb_.StartTable(); } - LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); flatbuffers::Offset<LSHProjectionOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3639,7 +3816,6 @@ struct SVDFOptionsBuilder { start_ = fbb_.StartTable(); } - SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); flatbuffers::Offset<SVDFOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3704,7 +3880,6 @@ struct RNNOptionsBuilder { start_ = fbb_.StartTable(); } - RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); flatbuffers::Offset<RNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3775,7 +3950,6 @@ struct SequenceRNNOptionsBuilder { start_ = fbb_.StartTable(); } - SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &); flatbuffers::Offset<SequenceRNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3855,7 +4029,6 @@ struct BidirectionalSequenceRNNOptionsBuilder { start_ = fbb_.StartTable(); } - BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &); flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3941,7 +4114,6 @@ struct FullyConnectedOptionsBuilder { start_ = fbb_.StartTable(); } - FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); flatbuffers::Offset<FullyConnectedOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3990,7 +4162,6 @@ struct SoftmaxOptionsBuilder { start_ = fbb_.StartTable(); } - SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); flatbuffers::Offset<SoftmaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4043,7 +4214,6 @@ struct ConcatenationOptionsBuilder { start_ = fbb_.StartTable(); } - ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); flatbuffers::Offset<ConcatenationOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4067,17 +4237,20 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table typedef AddOptionsBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } + bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable(); } }; @@ -4091,11 +4264,15 @@ struct AddOptionsBuilder fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); } + void add_pot_scale_int16(bool pot_scale_int16) + { + fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), + 1); + } explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - AddOptionsBuilder &operator=(const AddOptionsBuilder &); flatbuffers::Offset<AddOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4106,9 +4283,11 @@ struct AddOptionsBuilder inline flatbuffers::Offset<AddOptions> CreateAddOptions( flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { AddOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } @@ -4146,7 +4325,6 @@ struct MulOptionsBuilder { start_ = fbb_.StartTable(); } - MulOptionsBuilder &operator=(const MulOptionsBuilder &); flatbuffers::Offset<MulOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4197,7 +4375,6 @@ struct L2NormOptionsBuilder { start_ = fbb_.StartTable(); } - L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); flatbuffers::Offset<L2NormOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4263,8 +4440,6 @@ struct LocalResponseNormalizationOptionsBuilder { start_ = fbb_.StartTable(); } - LocalResponseNormalizationOptionsBuilder & - operator=(const LocalResponseNormalizationOptionsBuilder &); flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4353,7 +4528,6 @@ struct LSTMOptionsBuilder { start_ = fbb_.StartTable(); } - LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); flatbuffers::Offset<LSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4445,8 +4619,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder { start_ = fbb_.StartTable(); } - UnidirectionalSequenceLSTMOptionsBuilder & - operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4546,8 +4718,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder { start_ = fbb_.StartTable(); } - BidirectionalSequenceLSTMOptionsBuilder & - operator=(const BidirectionalSequenceLSTMOptionsBuilder &); flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4608,7 +4778,6 @@ struct ResizeBilinearOptionsBuilder { start_ = fbb_.StartTable(); } - ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); flatbuffers::Offset<ResizeBilinearOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4632,13 +4801,15 @@ struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer typedef ResizeNearestNeighborOptionsBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ALIGN_CORNERS = 4 + VT_ALIGN_CORNERS = 4, + VT_HALF_PIXEL_CENTERS = 6 }; bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && - verifier.EndTable(); + VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable(); } }; @@ -4652,11 +4823,15 @@ struct ResizeNearestNeighborOptionsBuilder fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0); } + void add_half_pixel_centers(bool half_pixel_centers) + { + fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, + static_cast<uint8_t>(half_pixel_centers), 0); + } explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &); flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4666,9 +4841,11 @@ struct ResizeNearestNeighborOptionsBuilder }; inline flatbuffers::Offset<ResizeNearestNeighborOptions> -CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false, + bool half_pixel_centers = false) { ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); builder_.add_align_corners(align_corners); return builder_.Finish(); } @@ -4701,7 +4878,6 @@ struct CallOptionsBuilder { start_ = fbb_.StartTable(); } - CallOptionsBuilder &operator=(const CallOptionsBuilder &); flatbuffers::Offset<CallOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4736,7 +4912,6 @@ struct PadOptionsBuilder { start_ = fbb_.StartTable(); } - PadOptionsBuilder &operator=(const PadOptionsBuilder &); flatbuffers::Offset<PadOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4769,7 +4944,6 @@ struct PadV2OptionsBuilder { start_ = fbb_.StartTable(); } - PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &); flatbuffers::Offset<PadV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -4815,7 +4989,6 @@ struct ReshapeOptionsBuilder { start_ = fbb_.StartTable(); } - ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); flatbuffers::Offset<ReshapeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4859,7 +5032,6 @@ struct SpaceToBatchNDOptionsBuilder { start_ = fbb_.StartTable(); } - SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &); flatbuffers::Offset<SpaceToBatchNDOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4893,7 +5065,6 @@ struct BatchToSpaceNDOptionsBuilder { start_ = fbb_.StartTable(); } - BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &); flatbuffers::Offset<BatchToSpaceNDOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4951,7 +5122,6 @@ struct SkipGramOptionsBuilder { start_ = fbb_.StartTable(); } - SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); flatbuffers::Offset<SkipGramOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4999,7 +5169,6 @@ struct SpaceToDepthOptionsBuilder { start_ = fbb_.StartTable(); } - SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); flatbuffers::Offset<SpaceToDepthOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5044,7 +5213,6 @@ struct DepthToSpaceOptionsBuilder { start_ = fbb_.StartTable(); } - DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &); flatbuffers::Offset<DepthToSpaceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5066,17 +5234,20 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table typedef SubOptionsBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; circle::ActivationFunctionType fused_activation_function() const { return static_cast<circle::ActivationFunctionType>( GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } + bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable(); } }; @@ -5090,11 +5261,15 @@ struct SubOptionsBuilder fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); } + void add_pot_scale_int16(bool pot_scale_int16) + { + fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), + 1); + } explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SubOptionsBuilder &operator=(const SubOptionsBuilder &); flatbuffers::Offset<SubOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5105,9 +5280,11 @@ struct SubOptionsBuilder inline flatbuffers::Offset<SubOptions> CreateSubOptions( flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { SubOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } @@ -5145,7 +5322,6 @@ struct DivOptionsBuilder { start_ = fbb_.StartTable(); } - DivOptionsBuilder &operator=(const DivOptionsBuilder &); flatbuffers::Offset<DivOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5181,7 +5357,6 @@ struct TopKV2OptionsBuilder { start_ = fbb_.StartTable(); } - TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &); flatbuffers::Offset<TopKV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -5228,7 +5403,6 @@ struct EmbeddingLookupSparseOptionsBuilder { start_ = fbb_.StartTable(); } - EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5251,13 +5425,15 @@ struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table typedef GatherOptionsBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_AXIS = 4 + VT_AXIS = 4, + VT_BATCH_DIMS = 6 }; int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && - verifier.EndTable(); + VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable(); } }; @@ -5267,11 +5443,14 @@ struct GatherOptionsBuilder flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); } + void add_batch_dims(int32_t batch_dims) + { + fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0); + } explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - GatherOptionsBuilder &operator=(const GatherOptionsBuilder &); flatbuffers::Offset<GatherOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5280,10 +5459,11 @@ struct GatherOptionsBuilder } }; -inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, - int32_t axis = 0) +inline flatbuffers::Offset<GatherOptions> +CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0) { GatherOptionsBuilder builder_(_fbb); + builder_.add_batch_dims(batch_dims); builder_.add_axis(axis); return builder_.Finish(); } @@ -5306,7 +5486,6 @@ struct TransposeOptionsBuilder { start_ = fbb_.StartTable(); } - TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &); flatbuffers::Offset<TransposeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5340,7 +5519,6 @@ struct ExpOptionsBuilder { start_ = fbb_.StartTable(); } - ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); flatbuffers::Offset<ExpOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5373,7 +5551,6 @@ struct CosOptionsBuilder { start_ = fbb_.StartTable(); } - CosOptionsBuilder &operator=(const CosOptionsBuilder &); flatbuffers::Offset<CosOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5416,7 +5593,6 @@ struct ReducerOptionsBuilder { start_ = fbb_.StartTable(); } - ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &); flatbuffers::Offset<ReducerOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5464,7 +5640,6 @@ struct SqueezeOptionsBuilder { start_ = fbb_.StartTable(); } - SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &); flatbuffers::Offset<SqueezeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5518,7 +5693,6 @@ struct SplitOptionsBuilder { start_ = fbb_.StartTable(); } - SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); flatbuffers::Offset<SplitOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5563,7 +5737,6 @@ struct SplitVOptionsBuilder { start_ = fbb_.StartTable(); } - SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &); flatbuffers::Offset<SplitVOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5635,7 +5808,6 @@ struct StridedSliceOptionsBuilder { start_ = fbb_.StartTable(); } - StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &); flatbuffers::Offset<StridedSliceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5676,7 +5848,6 @@ struct LogSoftmaxOptionsBuilder { start_ = fbb_.StartTable(); } - LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &); flatbuffers::Offset<LogSoftmaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5732,7 +5903,6 @@ struct CastOptionsBuilder { start_ = fbb_.StartTable(); } - CastOptionsBuilder &operator=(const CastOptionsBuilder &); flatbuffers::Offset<CastOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5770,7 +5940,6 @@ struct DequantizeOptionsBuilder { start_ = fbb_.StartTable(); } - DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &); flatbuffers::Offset<DequantizeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5804,7 +5973,6 @@ struct MaximumMinimumOptionsBuilder { start_ = fbb_.StartTable(); } - MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &); flatbuffers::Offset<MaximumMinimumOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5838,7 +6006,6 @@ struct TileOptionsBuilder { start_ = fbb_.StartTable(); } - TileOptionsBuilder &operator=(const TileOptionsBuilder &); flatbuffers::Offset<TileOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5884,7 +6051,6 @@ struct ArgMaxOptionsBuilder { start_ = fbb_.StartTable(); } - ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &); flatbuffers::Offset<ArgMaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5933,7 +6099,6 @@ struct ArgMinOptionsBuilder { start_ = fbb_.StartTable(); } - ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &); flatbuffers::Offset<ArgMinOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5969,7 +6134,6 @@ struct GreaterOptionsBuilder { start_ = fbb_.StartTable(); } - GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &); flatbuffers::Offset<GreaterOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6003,7 +6167,6 @@ struct GreaterEqualOptionsBuilder { start_ = fbb_.StartTable(); } - GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &); flatbuffers::Offset<GreaterEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6037,7 +6200,6 @@ struct LessOptionsBuilder { start_ = fbb_.StartTable(); } - LessOptionsBuilder &operator=(const LessOptionsBuilder &); flatbuffers::Offset<LessOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6070,7 +6232,6 @@ struct LessEqualOptionsBuilder { start_ = fbb_.StartTable(); } - LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &); flatbuffers::Offset<LessEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6104,7 +6265,6 @@ struct NegOptionsBuilder { start_ = fbb_.StartTable(); } - NegOptionsBuilder &operator=(const NegOptionsBuilder &); flatbuffers::Offset<NegOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6137,7 +6297,6 @@ struct SelectOptionsBuilder { start_ = fbb_.StartTable(); } - SelectOptionsBuilder &operator=(const SelectOptionsBuilder &); flatbuffers::Offset<SelectOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6170,7 +6329,6 @@ struct SliceOptionsBuilder { start_ = fbb_.StartTable(); } - SliceOptionsBuilder &operator=(const SliceOptionsBuilder &); flatbuffers::Offset<SliceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6229,7 +6387,6 @@ struct TransposeConvOptionsBuilder { start_ = fbb_.StartTable(); } - TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &); flatbuffers::Offset<TransposeConvOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6268,7 +6425,6 @@ struct ExpandDimsOptionsBuilder { start_ = fbb_.StartTable(); } - ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &); flatbuffers::Offset<ExpandDimsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6313,7 +6469,6 @@ struct SparseToDenseOptionsBuilder { start_ = fbb_.StartTable(); } - SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &); flatbuffers::Offset<SparseToDenseOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6348,7 +6503,6 @@ struct EqualOptionsBuilder { start_ = fbb_.StartTable(); } - EqualOptionsBuilder &operator=(const EqualOptionsBuilder &); flatbuffers::Offset<EqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6381,7 +6535,6 @@ struct NotEqualOptionsBuilder { start_ = fbb_.StartTable(); } - NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &); flatbuffers::Offset<NotEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6428,7 +6581,6 @@ struct ShapeOptionsBuilder { start_ = fbb_.StartTable(); } - ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &); flatbuffers::Offset<ShapeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6464,7 +6616,6 @@ struct RankOptionsBuilder { start_ = fbb_.StartTable(); } - RankOptionsBuilder &operator=(const RankOptionsBuilder &); flatbuffers::Offset<RankOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6497,7 +6648,6 @@ struct PowOptionsBuilder { start_ = fbb_.StartTable(); } - PowOptionsBuilder &operator=(const PowOptionsBuilder &); flatbuffers::Offset<PowOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6554,7 +6704,6 @@ struct FakeQuantOptionsBuilder { start_ = fbb_.StartTable(); } - FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &); flatbuffers::Offset<FakeQuantOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6606,7 +6755,6 @@ struct PackOptionsBuilder { start_ = fbb_.StartTable(); } - PackOptionsBuilder &operator=(const PackOptionsBuilder &); flatbuffers::Offset<PackOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6642,7 +6790,6 @@ struct LogicalOrOptionsBuilder { start_ = fbb_.StartTable(); } - LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &); flatbuffers::Offset<LogicalOrOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6683,7 +6830,6 @@ struct OneHotOptionsBuilder { start_ = fbb_.StartTable(); } - OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &); flatbuffers::Offset<OneHotOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6718,7 +6864,6 @@ struct AbsOptionsBuilder { start_ = fbb_.StartTable(); } - AbsOptionsBuilder &operator=(const AbsOptionsBuilder &); flatbuffers::Offset<AbsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6751,7 +6896,6 @@ struct HardSwishOptionsBuilder { start_ = fbb_.StartTable(); } - HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &); flatbuffers::Offset<HardSwishOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6785,7 +6929,6 @@ struct LogicalAndOptionsBuilder { start_ = fbb_.StartTable(); } - LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &); flatbuffers::Offset<LogicalAndOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6819,7 +6962,6 @@ struct LogicalNotOptionsBuilder { start_ = fbb_.StartTable(); } - LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &); flatbuffers::Offset<LogicalNotOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6863,7 +7005,6 @@ struct UnpackOptionsBuilder { start_ = fbb_.StartTable(); } - UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &); flatbuffers::Offset<UnpackOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6899,7 +7040,6 @@ struct FloorDivOptionsBuilder { start_ = fbb_.StartTable(); } - FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &); flatbuffers::Offset<FloorDivOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6933,7 +7073,6 @@ struct SquareOptionsBuilder { start_ = fbb_.StartTable(); } - SquareOptionsBuilder &operator=(const SquareOptionsBuilder &); flatbuffers::Offset<SquareOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6966,7 +7105,6 @@ struct ZerosLikeOptionsBuilder { start_ = fbb_.StartTable(); } - ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &); flatbuffers::Offset<ZerosLikeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7000,7 +7138,6 @@ struct FillOptionsBuilder { start_ = fbb_.StartTable(); } - FillOptionsBuilder &operator=(const FillOptionsBuilder &); flatbuffers::Offset<FillOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7033,7 +7170,6 @@ struct FloorModOptionsBuilder { start_ = fbb_.StartTable(); } - FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); flatbuffers::Offset<FloorModOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7067,7 +7203,6 @@ struct RangeOptionsBuilder { start_ = fbb_.StartTable(); } - RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); flatbuffers::Offset<RangeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7107,7 +7242,6 @@ struct LeakyReluOptionsBuilder { start_ = fbb_.StartTable(); } - LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &); flatbuffers::Offset<LeakyReluOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7142,7 +7276,6 @@ struct SquaredDifferenceOptionsBuilder { start_ = fbb_.StartTable(); } - SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &); flatbuffers::Offset<SquaredDifferenceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7189,7 +7322,6 @@ struct MirrorPadOptionsBuilder { start_ = fbb_.StartTable(); } - MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &); flatbuffers::Offset<MirrorPadOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7238,7 +7370,6 @@ struct UniqueOptionsBuilder { start_ = fbb_.StartTable(); } - UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &); flatbuffers::Offset<UniqueOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7274,7 +7405,6 @@ struct ReverseV2OptionsBuilder { start_ = fbb_.StartTable(); } - ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &); flatbuffers::Offset<ReverseV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -7308,7 +7438,6 @@ struct AddNOptionsBuilder { start_ = fbb_.StartTable(); } - AddNOptionsBuilder &operator=(const AddNOptionsBuilder &); flatbuffers::Offset<AddNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7341,7 +7470,6 @@ struct GatherNdOptionsBuilder { start_ = fbb_.StartTable(); } - GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &); flatbuffers::Offset<GatherNdOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7375,7 +7503,6 @@ struct WhereOptionsBuilder { start_ = fbb_.StartTable(); } - WhereOptionsBuilder &operator=(const WhereOptionsBuilder &); flatbuffers::Offset<WhereOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7424,7 +7551,6 @@ struct ReverseSequenceOptionsBuilder { start_ = fbb_.StartTable(); } - ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &); flatbuffers::Offset<ReverseSequenceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7461,7 +7587,6 @@ struct MatrixDiagOptionsBuilder { start_ = fbb_.StartTable(); } - MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &); flatbuffers::Offset<MatrixDiagOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7495,7 +7620,6 @@ struct QuantizeOptionsBuilder { start_ = fbb_.StartTable(); } - QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &); flatbuffers::Offset<QuantizeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7529,7 +7653,6 @@ struct MatrixSetDiagOptionsBuilder { start_ = fbb_.StartTable(); } - MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &); flatbuffers::Offset<MatrixSetDiagOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7579,7 +7702,6 @@ struct IfOptionsBuilder { start_ = fbb_.StartTable(); } - IfOptionsBuilder &operator=(const IfOptionsBuilder &); flatbuffers::Offset<IfOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7598,6 +7720,50 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui return builder_.Finish(); } +struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef CallOnceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_INIT_SUBGRAPH_INDEX = 4 + }; + int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) && + verifier.EndTable(); + } +}; + +struct CallOnceOptionsBuilder +{ + typedef CallOnceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_init_subgraph_index(int32_t init_subgraph_index) + { + fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0); + } + explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<CallOnceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CallOnceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CallOnceOptions> +CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0) +{ + CallOnceOptionsBuilder builder_(_fbb); + builder_.add_init_subgraph_index(init_subgraph_index); + return builder_.Finish(); +} + struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef WhileOptionsBuilder Builder; @@ -7632,7 +7798,6 @@ struct WhileOptionsBuilder { start_ = fbb_.StartTable(); } - WhileOptionsBuilder &operator=(const WhileOptionsBuilder &); flatbuffers::Offset<WhileOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7669,7 +7834,6 @@ struct NonMaxSuppressionV4OptionsBuilder { start_ = fbb_.StartTable(); } - NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &); flatbuffers::Offset<NonMaxSuppressionV4Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -7703,7 +7867,6 @@ struct NonMaxSuppressionV5OptionsBuilder { start_ = fbb_.StartTable(); } - NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &); flatbuffers::Offset<NonMaxSuppressionV5Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -7737,7 +7900,6 @@ struct ScatterNdOptionsBuilder { start_ = fbb_.StartTable(); } - ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &); flatbuffers::Offset<ScatterNdOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7771,7 +7933,6 @@ struct SelectV2OptionsBuilder { start_ = fbb_.StartTable(); } - SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &); flatbuffers::Offset<SelectV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -7805,7 +7966,6 @@ struct DensifyOptionsBuilder { start_ = fbb_.StartTable(); } - DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &); flatbuffers::Offset<DensifyOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7839,7 +7999,6 @@ struct SegmentSumOptionsBuilder { start_ = fbb_.StartTable(); } - SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &); flatbuffers::Offset<SegmentSumOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7861,14 +8020,20 @@ struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_ADJOINT_LHS = 4, - VT_ADJOINT_RHS = 6 + VT_ADJOINT_RHS = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 }; bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; } bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; } + bool asymmetric_quantize_inputs() const + { + return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) && - VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable(); + VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && + VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable(); } }; @@ -7887,11 +8052,15 @@ struct BatchMatMulOptionsBuilder fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs), 0); } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) + { + fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, + static_cast<uint8_t>(asymmetric_quantize_inputs), 0); + } explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &); flatbuffers::Offset<BatchMatMulOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7902,14 +8071,478 @@ struct BatchMatMulOptionsBuilder inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false, - bool adjoint_rhs = false) + bool adjoint_rhs = false, bool asymmetric_quantize_inputs = false) { BatchMatMulOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); builder_.add_adjoint_rhs(adjoint_rhs); builder_.add_adjoint_lhs(adjoint_lhs); return builder_.Finish(); } +struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef CumsumOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_EXCLUSIVE = 4, + VT_REVERSE = 6 + }; + bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; } + bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) && + VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable(); + } +}; + +struct CumsumOptionsBuilder +{ + typedef CumsumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_exclusive(bool exclusive) + { + fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0); + } + void add_reverse(bool reverse) + { + fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0); + } + explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<CumsumOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CumsumOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, + bool exclusive = false, + bool reverse = false) +{ + CumsumOptionsBuilder builder_(_fbb); + builder_.add_reverse(reverse); + builder_.add_exclusive(exclusive); + return builder_.Finish(); +} + +struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef BroadcastToOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BroadcastToOptionsBuilder +{ + typedef BroadcastToOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<BroadcastToOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BroadcastToOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BroadcastToOptions> +CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + BroadcastToOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef Rfft2dOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Rfft2dOptionsBuilder +{ + typedef Rfft2dOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<Rfft2dOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Rfft2dOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + Rfft2dOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_TABLE_ID = 4, + VT_KEY_DTYPE = 6, + VT_VALUE_DTYPE = 8 + }; + int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); } + circle::TensorType key_dtype() const + { + return static_cast<circle::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0)); + } + circle::TensorType value_dtype() const + { + return static_cast<circle::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) && + VerifyField<int8_t>(verifier, VT_KEY_DTYPE) && + VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable(); + } +}; + +struct HashtableOptionsBuilder +{ + typedef HashtableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_table_id(int32_t table_id) + { + fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0); + } + void add_key_dtype(circle::TensorType key_dtype) + { + fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0); + } + void add_value_dtype(circle::TensorType value_dtype) + { + fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0); + } + explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableOptions> +CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0, + circle::TensorType key_dtype = circle::TensorType_FLOAT32, + circle::TensorType value_dtype = circle::TensorType_FLOAT32) +{ + HashtableOptionsBuilder builder_(_fbb); + builder_.add_table_id(table_id); + builder_.add_value_dtype(value_dtype); + builder_.add_key_dtype(key_dtype); + return builder_.Finish(); +} + +struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableFindOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableFindOptionsBuilder +{ + typedef HashtableFindOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableFindOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableFindOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableFindOptions> +CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableFindOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableImportOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableImportOptionsBuilder +{ + typedef HashtableImportOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableImportOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableImportOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableImportOptions> +CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableImportOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableSizeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableSizeOptionsBuilder +{ + typedef HashtableSizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableSizeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableSizeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableSizeOptions> +CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableSizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef VarHandleOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_CONTAINER = 4, + VT_SHARED_NAME = 6 + }; + const flatbuffers::String *container() const + { + return GetPointer<const flatbuffers::String *>(VT_CONTAINER); + } + const flatbuffers::String *shared_name() const + { + return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) && + verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) && + verifier.VerifyString(shared_name()) && verifier.EndTable(); + } +}; + +struct VarHandleOptionsBuilder +{ + typedef VarHandleOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_container(flatbuffers::Offset<flatbuffers::String> container) + { + fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container); + } + void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name) + { + fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name); + } + explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<VarHandleOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<VarHandleOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<VarHandleOptions> +CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::String> container = 0, + flatbuffers::Offset<flatbuffers::String> shared_name = 0) +{ + VarHandleOptionsBuilder builder_(_fbb); + builder_.add_shared_name(shared_name); + builder_.add_container(container); + return builder_.Finish(); +} + +inline flatbuffers::Offset<VarHandleOptions> +CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr, + const char *shared_name = nullptr) +{ + auto container__ = container ? _fbb.CreateString(container) : 0; + auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0; + return circle::CreateVarHandleOptions(_fbb, container__, shared_name__); +} + +struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef ReadVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ReadVariableOptionsBuilder +{ + typedef ReadVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<ReadVariableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReadVariableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReadVariableOptions> +CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ReadVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef AssignVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct AssignVariableOptionsBuilder +{ + typedef AssignVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<AssignVariableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AssignVariableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AssignVariableOptions> +CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + AssignVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef RandomOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_SEED = 4, + VT_SEED2 = 6 + }; + int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); } + int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) && + VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable(); + } +}; + +struct RandomOptionsBuilder +{ + typedef RandomOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); } + void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); } + explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<RandomOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RandomOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t seed = 0, int32_t seed2 = 0) +{ + RandomOptionsBuilder builder_(_fbb); + builder_.add_seed2(seed2); + builder_.add_seed(seed); + return builder_.Finish(); +} + struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef BCQGatherOptionsBuilder Builder; @@ -7941,7 +8574,6 @@ struct BCQGatherOptionsBuilder { start_ = fbb_.StartTable(); } - BCQGatherOptionsBuilder &operator=(const BCQGatherOptionsBuilder &); flatbuffers::Offset<BCQGatherOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -8000,7 +8632,6 @@ struct BCQFullyConnectedOptionsBuilder { start_ = fbb_.StartTable(); } - BCQFullyConnectedOptionsBuilder &operator=(const BCQFullyConnectedOptionsBuilder &); flatbuffers::Offset<BCQFullyConnectedOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -8058,7 +8689,6 @@ struct InstanceNormOptionsBuilder { start_ = fbb_.StartTable(); } - InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &); flatbuffers::Offset<InstanceNormOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -8082,24 +8712,28 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table typedef OperatorCodeBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BUILTIN_CODE = 4, + VT_DEPRECATED_BUILTIN_CODE = 4, VT_CUSTOM_CODE = 6, - VT_VERSION = 8 + VT_VERSION = 8, + VT_BUILTIN_CODE = 10 }; - circle::BuiltinOperator builtin_code() const - { - return static_cast<circle::BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0)); - } + int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); } const flatbuffers::String *custom_code() const { return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE); } int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); } + circle::BuiltinOperator builtin_code() const + { + return static_cast<circle::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) && + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) && VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) && - VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable(); + VerifyField<int32_t>(verifier, VT_VERSION) && + VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable(); } }; @@ -8108,9 +8742,9 @@ struct OperatorCodeBuilder typedef OperatorCode Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_builtin_code(circle::BuiltinOperator builtin_code) + void add_deprecated_builtin_code(int8_t deprecated_builtin_code) { - fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0); + fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0); } void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) { @@ -8120,11 +8754,14 @@ struct OperatorCodeBuilder { fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1); } + void add_builtin_code(circle::BuiltinOperator builtin_code) + { + fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0); + } explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); flatbuffers::Offset<OperatorCode> Finish() { const auto end = fbb_.EndTable(start_); @@ -8134,24 +8771,26 @@ struct OperatorCodeBuilder }; inline flatbuffers::Offset<OperatorCode> -CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, - circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD, - flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1) +CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0, + flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1, + circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD) { OperatorCodeBuilder builder_(_fbb); + builder_.add_builtin_code(builtin_code); builder_.add_version(version); builder_.add_custom_code(custom_code); - builder_.add_builtin_code(builtin_code); + builder_.add_deprecated_builtin_code(deprecated_builtin_code); return builder_.Finish(); } inline flatbuffers::Offset<OperatorCode> -CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, - circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD, - const char *custom_code = nullptr, int32_t version = 1) +CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0, + const char *custom_code = nullptr, int32_t version = 1, + circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD) { auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0; - return circle::CreateOperatorCode(_fbb, builtin_code, custom_code__, version); + return circle::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version, + builtin_code); } struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -8796,6 +9435,84 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table ? static_cast<const circle::BatchMatMulOptions *>(builtin_options()) : nullptr; } + const circle::CumsumOptions *builtin_options_as_CumsumOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_CumsumOptions + ? static_cast<const circle::CumsumOptions *>(builtin_options()) + : nullptr; + } + const circle::CallOnceOptions *builtin_options_as_CallOnceOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_CallOnceOptions + ? static_cast<const circle::CallOnceOptions *>(builtin_options()) + : nullptr; + } + const circle::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_BroadcastToOptions + ? static_cast<const circle::BroadcastToOptions *>(builtin_options()) + : nullptr; + } + const circle::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_Rfft2dOptions + ? static_cast<const circle::Rfft2dOptions *>(builtin_options()) + : nullptr; + } + const circle::Conv3DOptions *builtin_options_as_Conv3DOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_Conv3DOptions + ? static_cast<const circle::Conv3DOptions *>(builtin_options()) + : nullptr; + } + const circle::HashtableOptions *builtin_options_as_HashtableOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_HashtableOptions + ? static_cast<const circle::HashtableOptions *>(builtin_options()) + : nullptr; + } + const circle::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_HashtableFindOptions + ? static_cast<const circle::HashtableFindOptions *>(builtin_options()) + : nullptr; + } + const circle::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_HashtableImportOptions + ? static_cast<const circle::HashtableImportOptions *>(builtin_options()) + : nullptr; + } + const circle::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_HashtableSizeOptions + ? static_cast<const circle::HashtableSizeOptions *>(builtin_options()) + : nullptr; + } + const circle::VarHandleOptions *builtin_options_as_VarHandleOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_VarHandleOptions + ? static_cast<const circle::VarHandleOptions *>(builtin_options()) + : nullptr; + } + const circle::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_ReadVariableOptions + ? static_cast<const circle::ReadVariableOptions *>(builtin_options()) + : nullptr; + } + const circle::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_AssignVariableOptions + ? static_cast<const circle::AssignVariableOptions *>(builtin_options()) + : nullptr; + } + const circle::RandomOptions *builtin_options_as_RandomOptions() const + { + return builtin_options_type() == circle::BuiltinOptions_RandomOptions + ? static_cast<const circle::RandomOptions *>(builtin_options()) + : nullptr; + } const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const { return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions @@ -9498,6 +10215,92 @@ Operator::builtin_options_as<circle::BatchMatMulOptions>() const } template <> +inline const circle::CumsumOptions *Operator::builtin_options_as<circle::CumsumOptions>() const +{ + return builtin_options_as_CumsumOptions(); +} + +template <> +inline const circle::CallOnceOptions *Operator::builtin_options_as<circle::CallOnceOptions>() const +{ + return builtin_options_as_CallOnceOptions(); +} + +template <> +inline const circle::BroadcastToOptions * +Operator::builtin_options_as<circle::BroadcastToOptions>() const +{ + return builtin_options_as_BroadcastToOptions(); +} + +template <> +inline const circle::Rfft2dOptions *Operator::builtin_options_as<circle::Rfft2dOptions>() const +{ + return builtin_options_as_Rfft2dOptions(); +} + +template <> +inline const circle::Conv3DOptions *Operator::builtin_options_as<circle::Conv3DOptions>() const +{ + return builtin_options_as_Conv3DOptions(); +} + +template <> +inline const circle::HashtableOptions * +Operator::builtin_options_as<circle::HashtableOptions>() const +{ + return builtin_options_as_HashtableOptions(); +} + +template <> +inline const circle::HashtableFindOptions * +Operator::builtin_options_as<circle::HashtableFindOptions>() const +{ + return builtin_options_as_HashtableFindOptions(); +} + +template <> +inline const circle::HashtableImportOptions * +Operator::builtin_options_as<circle::HashtableImportOptions>() const +{ + return builtin_options_as_HashtableImportOptions(); +} + +template <> +inline const circle::HashtableSizeOptions * +Operator::builtin_options_as<circle::HashtableSizeOptions>() const +{ + return builtin_options_as_HashtableSizeOptions(); +} + +template <> +inline const circle::VarHandleOptions * +Operator::builtin_options_as<circle::VarHandleOptions>() const +{ + return builtin_options_as_VarHandleOptions(); +} + +template <> +inline const circle::ReadVariableOptions * +Operator::builtin_options_as<circle::ReadVariableOptions>() const +{ + return builtin_options_as_ReadVariableOptions(); +} + +template <> +inline const circle::AssignVariableOptions * +Operator::builtin_options_as<circle::AssignVariableOptions>() const +{ + return builtin_options_as_AssignVariableOptions(); +} + +template <> +inline const circle::RandomOptions *Operator::builtin_options_as<circle::RandomOptions>() const +{ + return builtin_options_as_RandomOptions(); +} + +template <> inline const circle::BCQGatherOptions * Operator::builtin_options_as<circle::BCQGatherOptions>() const { @@ -9566,7 +10369,6 @@ struct OperatorBuilder { start_ = fbb_.StartTable(); } - OperatorBuilder &operator=(const OperatorBuilder &); flatbuffers::Offset<Operator> Finish() { const auto end = fbb_.EndTable(start_); @@ -9705,7 +10507,6 @@ struct SubGraphBuilder { start_ = fbb_.StartTable(); } - SubGraphBuilder &operator=(const SubGraphBuilder &); flatbuffers::Offset<SubGraph> Finish() { const auto end = fbb_.EndTable(start_); @@ -9781,7 +10582,6 @@ struct BufferBuilder { start_ = fbb_.StartTable(); } - BufferBuilder &operator=(const BufferBuilder &); flatbuffers::Offset<Buffer> Finish() { const auto end = fbb_.EndTable(start_); @@ -9845,7 +10645,6 @@ struct MetadataBuilder { start_ = fbb_.StartTable(); } - MetadataBuilder &operator=(const MetadataBuilder &); flatbuffers::Offset<Metadata> Finish() { const auto end = fbb_.EndTable(start_); @@ -9872,6 +10671,168 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe return circle::CreateMetadata(_fbb, name__, buffer); } +struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef TensorMapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_NAME = 4, + VT_TENSOR_INDEX = 6 + }; + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) && + verifier.EndTable(); + } +}; + +struct TensorMapBuilder +{ + typedef TensorMap Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(TensorMap::VT_NAME, name); + } + void add_tensor_index(uint32_t tensor_index) + { + fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0); + } + explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<TensorMap> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TensorMap>(end); + return o; + } +}; + +inline flatbuffers::Offset<TensorMap> +CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0) +{ + TensorMapBuilder builder_(_fbb); + builder_.add_tensor_index(tensor_index); + builder_.add_name(name); + return builder_.Finish(); +} + +inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t tensor_index = 0) +{ + auto name__ = name ? _fbb.CreateString(name) : 0; + return circle::CreateTensorMap(_fbb, name__, tensor_index); +} + +struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef SignatureDefBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_INPUTS = 4, + VT_OUTPUTS = 6, + VT_SIGNATURE_KEY = 8, + VT_SUBGRAPH_INDEX = 12 + }; + const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *inputs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>( + VT_INPUTS); + } + const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *outputs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>( + VT_OUTPUTS); + } + const flatbuffers::String *signature_key() const + { + return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY); + } + uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) && + verifier.VerifyString(signature_key()) && + VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable(); + } +}; + +struct SignatureDefBuilder +{ + typedef SignatureDef Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_inputs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs) + { + fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs); + } + void add_outputs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs) + { + fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs); + } + void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key) + { + fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key); + } + void add_subgraph_index(uint32_t subgraph_index) + { + fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0); + } + explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<SignatureDef> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SignatureDef>(end); + return o; + } +}; + +inline flatbuffers::Offset<SignatureDef> CreateSignatureDef( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs = 0, + flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0) +{ + SignatureDefBuilder builder_(_fbb); + builder_.add_subgraph_index(subgraph_index); + builder_.add_signature_key(signature_key); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<circle::TensorMap>> *inputs = nullptr, + const std::vector<flatbuffers::Offset<circle::TensorMap>> *outputs = nullptr, + const char *signature_key = nullptr, uint32_t subgraph_index = 0) +{ + auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*inputs) : 0; + auto outputs__ = + outputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*outputs) : 0; + auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0; + return circle::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, subgraph_index); +} + struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ModelBuilder Builder; @@ -9883,7 +10844,8 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_DESCRIPTION = 10, VT_BUFFERS = 12, VT_METADATA_BUFFER = 14, - VT_METADATA = 16 + VT_METADATA = 16, + VT_SIGNATURE_DEFS = 18 }; uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); } const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const @@ -9913,6 +10875,11 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>( VT_METADATA); } + const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *>( + VT_SIGNATURE_DEFS); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) && @@ -9924,7 +10891,9 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) && VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) && VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) && - verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable(); + verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) && + verifier.VerifyVector(signature_defs()) && + verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable(); } }; @@ -9963,11 +10932,16 @@ struct ModelBuilder { fbb_.AddOffset(Model::VT_METADATA, metadata); } + void add_signature_defs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>> + signature_defs) + { + fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs); + } explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ModelBuilder &operator=(const ModelBuilder &); flatbuffers::Offset<Model> Finish() { const auto end = fbb_.EndTable(start_); @@ -9984,9 +10958,12 @@ inline flatbuffers::Offset<Model> CreateModel( flatbuffers::Offset<flatbuffers::String> description = 0, flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0, flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>> + signature_defs = 0) { ModelBuilder builder_(_fbb); + builder_.add_signature_defs(signature_defs); builder_.add_metadata(metadata); builder_.add_metadata_buffer(metadata_buffer); builder_.add_buffers(buffers); @@ -10004,7 +10981,8 @@ inline flatbuffers::Offset<Model> CreateModelDirect( const char *description = nullptr, const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr, const std::vector<int32_t> *metadata_buffer = nullptr, - const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr) + const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr, + const std::vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs = nullptr) { auto operator_codes__ = operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes) @@ -10016,8 +10994,11 @@ inline flatbuffers::Offset<Model> CreateModelDirect( auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0; auto metadata__ = metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0; + auto signature_defs__ = + signature_defs ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(*signature_defs) + : 0; return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__, - metadata_buffer__, metadata__); + metadata_buffer__, metadata__, signature_defs__); } inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, @@ -10621,6 +11602,71 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_CumsumOptions: + { + auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOnceOptions: + { + auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BroadcastToOptions: + { + auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Rfft2dOptions: + { + auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Conv3DOptions: + { + auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableOptions: + { + auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableFindOptions: + { + auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableImportOptions: + { + auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableSizeOptions: + { + auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_VarHandleOptions: + { + auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReadVariableOptions: + { + auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AssignVariableOptions: + { + auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RandomOptions: + { + auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj); + return verifier.VerifyTable(ptr); + } case BuiltinOptions_BCQGatherOptions: { auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj); diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc index 9ecb7d190..ba739f618 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc @@ -82,6 +82,27 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index) return static_cast<uint32_t>(int32_value); } +Activation getActivation(Operands &operands, const OperandIndex index) +{ + switch (operands.at(index).asScalar<int32_t>()) + { + case 0: + return Activation::NONE; + case 1: + return Activation::RELU; + case 2: + return Activation::RELU1; + case 3: + return Activation::RELU6; + case 4: + return Activation::TANH; + case 6: + return Activation::SIGMOID; + default: + throw std::runtime_error("Unsupported activation type"); + } +} + OperationFactory::Generator getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type, float alpha = 0.f, float beta = 0.f) @@ -519,10 +540,6 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_CAST] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); - // ANEURALNETWORKS_CAST_EX is deprecated - // TODO Remove ANEURALNETWORKS_CAST_EX - _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST]; - _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param, Operands &operands) { using operation::Conv2D; @@ -651,10 +668,6 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_REDUCE_SUM] = getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM); - // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated - // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX - _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM]; - _map[ANEURALNETWORKS_SUB] = getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); @@ -770,10 +783,6 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP); - // ANEURALNETWORKS_EXP_EX is deprecated - // TODO Remove ANEURALNETWORKS_EXP_EX - _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP]; - // Each input should be interpreted as follows: // 0 -> Input Tensor Index // 1 -> Axis Tensor Index @@ -791,52 +800,6 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_EQUAL] = getComparisonGenerator(operation::Comparison::ComparisonType::Equal); - // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated - // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX - _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual; - - // Output operand type must be boolean - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - return new operation::Comparison{inputs, outputs, param}; - }; - - // ANEURALNETWORKS_LESS_EX is deprecated - // TODO Remove ANEURALNETWORKS_LESS_EX - _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::Less; - - // Output operand type must be boolean - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - return new operation::Comparison{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_REDUCE_ALL] = getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL); @@ -846,61 +809,9 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_REDUCE_MAX] = getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX); - // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated - // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX - _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX]; - - // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated - // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX - _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input1 Tensor Index - // 1 -> input2 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::NotEqual; - - // Output operand type must be boolean - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - return new operation::Comparison{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); - // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated - // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX - _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - // This operation's operands must be boolean type. - replaceDataType(operands, inputs.at(0), DataType::BOOL8); - replaceDataType(operands, inputs.at(1), DataType::BOOL8); - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - operation::ElementwiseBinary::Param param; - param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND; - - return new operation::ElementwiseBinary{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_RSQRT] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); @@ -919,24 +830,7 @@ OperationFactory::OperationFactory() return new operation::Select{inputs, outputs}; }; - _map[ANEURALNETWORKS_SELECT_V2_EX] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 3 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> Condition Tensor Index - // 1 -> Input X(true) Tensor Index - // 2 -> Input Y(false) Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]}; - - return new operation::Select{inputs, outputs}; - }; - - // ANEURALNETWORKS_RSQRT_EX is deprecated - // TODO Remove ANEURALNETWORKS_RSQRT_EX - _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT]; + _map[ANEURALNETWORKS_SELECT_V2_EX] = _map[ANEURALNETWORKS_SELECT]; _map[ANEURALNETWORKS_RELU] = getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, @@ -1141,10 +1035,6 @@ OperationFactory::OperationFactory() return new operation::PReLU{inputs, outputs}; }; - // ANEURALNETWORKS_PRELU_EX is deprecated - // TODO Remove ANEURALNETWORKS_PRELU_EX - _map[ANEURALNETWORKS_PRELU_EX] = _map[ANEURALNETWORKS_PRELU]; - _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 6 && init_param.output_count == 1); @@ -1178,64 +1068,12 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_SQRT] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); - // ANEURALNETWORKS_SQRT_EX is deprecated - // TODO Remove ANEURALNETWORKS_SQRT_EX - _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT]; - _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); - // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated - // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX - _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - // This operation's operands must be boolean type. - replaceDataType(operands, inputs.at(0), DataType::BOOL8); - replaceDataType(operands, inputs.at(1), DataType::BOOL8); - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - operation::ElementwiseBinary::Param param; - param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR; - - return new operation::ElementwiseBinary{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_LOGICAL_NOT] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); - // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated - // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX - _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - - // This operation's operands must be boolean type. - replaceDataType(operands, inputs.at(0), DataType::BOOL8); - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - operation::ElementwiseUnary::Param param; - param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT; - - return new operation::ElementwiseUnary{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 23 && init_param.output_count == 4); @@ -1280,31 +1118,7 @@ OperationFactory::OperationFactory() } operation::LSTM::Param param; - const auto activation_index = OperandIndex{init_param.inputs[20]}; - switch (operands.at(activation_index).asScalar<int32_t>()) - { - case 0: - param.activation = Activation::NONE; - break; - case 1: - param.activation = Activation::RELU; - break; - case 2: - param.activation = Activation::RELU1; - break; - case 3: - param.activation = Activation::RELU6; - break; - case 4: - param.activation = Activation::TANH; - break; - case 6: - param.activation = Activation::SIGMOID; - break; - default: - throw std::runtime_error("Unsupported activation type"); - break; - } + param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]}); param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>(); param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>(); // This is initialization to prevent warning or error by static code analyzer. LSTM operation @@ -1378,31 +1192,7 @@ OperationFactory::OperationFactory() output_index}; operation::LSTM::Param param; - const auto activation_index = OperandIndex{init_param.inputs[20]}; - switch (operands.at(activation_index).asScalar<int32_t>()) - { - case 0: - param.activation = Activation::NONE; - break; - case 1: - param.activation = Activation::RELU; - break; - case 2: - param.activation = Activation::RELU1; - break; - case 3: - param.activation = Activation::RELU6; - break; - case 4: - param.activation = Activation::TANH; - break; - case 6: - param.activation = Activation::SIGMOID; - break; - default: - throw std::runtime_error("Unsupported activation type"); - break; - } + param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]}); param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>(); param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>(); param.time_major = operands.at(OperandIndex{init_param.inputs[23]}).asScalar<bool>(); @@ -1410,29 +1200,6 @@ OperationFactory::OperationFactory() return new operation::LSTM{inputs, outputs, param}; }; - // ANEURALNETWORKS_EQUAL_EX is deprecated - // TODO Remove ANEURALNETWORKS_EQUAL_EX - _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::Equal; - - // Output operand type must be boolean - replaceDataType(operands, outputs.at(0), DataType::BOOL8); - - return new operation::Comparison{inputs, outputs, param}; - }; - _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1470,10 +1237,6 @@ OperationFactory::OperationFactory() return new operation::TopKV2{inputs, outputs, param}; }; - // ANEURALNETWORKS_CAST_EX is deprecated - // TODO Remove ANEURALNETWORKS_CAST_EX - _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2]; - _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1492,22 +1255,10 @@ OperationFactory::OperationFactory() return new operation::Gather{inputs, outputs, param}; }; - // ANEURALNETWORKS_GATHER_EX is deprecated - // TODO Remove ANEURALNETWORKS_GATHER_EX - _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER]; - _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG); - // ANEURALNETWORKS_NEG_EX is deprecated - // TODO Remove ANEURALNETWORKS_NEG_EX - _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG]; - _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS); - // ANEURALNETWORKS_ABS_EX is deprecated - // TODO Remove ANEURALNETWORKS_ABS_EX - _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS]; - _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1527,10 +1278,6 @@ OperationFactory::OperationFactory() return new operation::ArgMinMax{inputs, outputs, param}; }; - // ANEURALNETWORKS_ARGMAX_EX is deprecated - // TODO Remove ANEURALNETWORKS_ARGMAX_EX - _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX]; - _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1630,10 +1377,6 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_REDUCE_MIN] = getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN); - // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated - // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX - _map[ANEURALNETWORKS_REDUCE_MIN_EX] = _map[ANEURALNETWORKS_REDUCE_MIN]; - _map[ANEURALNETWORKS_SPLIT] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3); assert(init_param.output_count >= 1); // At least one output tensor and axis @@ -1668,10 +1411,6 @@ OperationFactory::OperationFactory() return new operation::SplitV{inputs, outputs, param}; }; - // ANEURALNETWORKS_SPLIT_EX is deprecated - // TODO Remove ANEURALNETWORKS_SPLIT_EX - _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT]; - _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3 && init_param.output_count >= 1); diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc index a3038b718..3b160473d 100644 --- a/runtime/onert/frontend/tflite/src/tflite_loader.cc +++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc @@ -64,6 +64,12 @@ struct LoaderDomain class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain> { +protected: + // Different option name + // Circle: adjoint_lhs, adjoint_rhs + // TFLite: adj_x, adj_y + void loadBatchMatMul(const Operator *op, ir::Graph &subg); + public: using BaseLoader::BaseLoader; @@ -112,8 +118,40 @@ private: return subg; } + + void loadOperation(const onert_tflite::Operator *op, ir::Graph &subg) + { + auto const builtin_op = getBuiltinOperator(op); + + switch (builtin_op) + { + case onert_tflite::BuiltinOperator::BuiltinOperator_BATCH_MATMUL: + loadBatchMatMul(op, subg); + return; + default: + BaseLoader::loadOperation(op, subg); + return; + } + } }; +void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(op, inputs, outputs); + + ir::operation::BatchMatMul::Param param; + const auto *options = op->builtin_options_as_BatchMatMulOptions(); + + param.adj_x = options->adj_x(); + param.adj_y = options->adj_y(); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param)); + subg.addOperation(std::move(new_op)); +} + } // namespace std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename) diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h index 8e1b84e29..cec5bce74 100644 --- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h +++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h @@ -26,236 +26,396 @@ namespace onert_tflite { struct CustomQuantization; +struct CustomQuantizationBuilder; struct QuantizationParameters; +struct QuantizationParametersBuilder; struct Int32Vector; +struct Int32VectorBuilder; struct Uint16Vector; +struct Uint16VectorBuilder; struct Uint8Vector; +struct Uint8VectorBuilder; struct DimensionMetadata; +struct DimensionMetadataBuilder; struct SparsityParameters; +struct SparsityParametersBuilder; struct Tensor; +struct TensorBuilder; struct Conv2DOptions; +struct Conv2DOptionsBuilder; + +struct Conv3DOptions; +struct Conv3DOptionsBuilder; struct Pool2DOptions; +struct Pool2DOptionsBuilder; struct DepthwiseConv2DOptions; +struct DepthwiseConv2DOptionsBuilder; struct ConcatEmbeddingsOptions; +struct ConcatEmbeddingsOptionsBuilder; struct LSHProjectionOptions; +struct LSHProjectionOptionsBuilder; struct SVDFOptions; +struct SVDFOptionsBuilder; struct RNNOptions; +struct RNNOptionsBuilder; struct SequenceRNNOptions; +struct SequenceRNNOptionsBuilder; struct BidirectionalSequenceRNNOptions; +struct BidirectionalSequenceRNNOptionsBuilder; struct FullyConnectedOptions; +struct FullyConnectedOptionsBuilder; struct SoftmaxOptions; +struct SoftmaxOptionsBuilder; struct ConcatenationOptions; +struct ConcatenationOptionsBuilder; struct AddOptions; +struct AddOptionsBuilder; struct MulOptions; +struct MulOptionsBuilder; struct L2NormOptions; +struct L2NormOptionsBuilder; struct LocalResponseNormalizationOptions; +struct LocalResponseNormalizationOptionsBuilder; struct LSTMOptions; +struct LSTMOptionsBuilder; struct UnidirectionalSequenceLSTMOptions; +struct UnidirectionalSequenceLSTMOptionsBuilder; struct BidirectionalSequenceLSTMOptions; +struct BidirectionalSequenceLSTMOptionsBuilder; struct ResizeBilinearOptions; +struct ResizeBilinearOptionsBuilder; struct ResizeNearestNeighborOptions; +struct ResizeNearestNeighborOptionsBuilder; struct CallOptions; +struct CallOptionsBuilder; struct PadOptions; +struct PadOptionsBuilder; struct PadV2Options; +struct PadV2OptionsBuilder; struct ReshapeOptions; +struct ReshapeOptionsBuilder; struct SpaceToBatchNDOptions; +struct SpaceToBatchNDOptionsBuilder; struct BatchToSpaceNDOptions; +struct BatchToSpaceNDOptionsBuilder; struct SkipGramOptions; +struct SkipGramOptionsBuilder; struct SpaceToDepthOptions; +struct SpaceToDepthOptionsBuilder; struct DepthToSpaceOptions; +struct DepthToSpaceOptionsBuilder; struct SubOptions; +struct SubOptionsBuilder; struct DivOptions; +struct DivOptionsBuilder; struct TopKV2Options; +struct TopKV2OptionsBuilder; struct EmbeddingLookupSparseOptions; +struct EmbeddingLookupSparseOptionsBuilder; struct GatherOptions; +struct GatherOptionsBuilder; struct TransposeOptions; +struct TransposeOptionsBuilder; struct ExpOptions; +struct ExpOptionsBuilder; struct CosOptions; +struct CosOptionsBuilder; struct ReducerOptions; +struct ReducerOptionsBuilder; struct SqueezeOptions; +struct SqueezeOptionsBuilder; struct SplitOptions; +struct SplitOptionsBuilder; struct SplitVOptions; +struct SplitVOptionsBuilder; struct StridedSliceOptions; +struct StridedSliceOptionsBuilder; struct LogSoftmaxOptions; +struct LogSoftmaxOptionsBuilder; struct CastOptions; +struct CastOptionsBuilder; struct DequantizeOptions; +struct DequantizeOptionsBuilder; struct MaximumMinimumOptions; +struct MaximumMinimumOptionsBuilder; struct TileOptions; +struct TileOptionsBuilder; struct ArgMaxOptions; +struct ArgMaxOptionsBuilder; struct ArgMinOptions; +struct ArgMinOptionsBuilder; struct GreaterOptions; +struct GreaterOptionsBuilder; struct GreaterEqualOptions; +struct GreaterEqualOptionsBuilder; struct LessOptions; +struct LessOptionsBuilder; struct LessEqualOptions; +struct LessEqualOptionsBuilder; struct NegOptions; +struct NegOptionsBuilder; struct SelectOptions; +struct SelectOptionsBuilder; struct SliceOptions; +struct SliceOptionsBuilder; struct TransposeConvOptions; +struct TransposeConvOptionsBuilder; struct ExpandDimsOptions; +struct ExpandDimsOptionsBuilder; struct SparseToDenseOptions; +struct SparseToDenseOptionsBuilder; struct EqualOptions; +struct EqualOptionsBuilder; struct NotEqualOptions; +struct NotEqualOptionsBuilder; struct ShapeOptions; +struct ShapeOptionsBuilder; struct RankOptions; +struct RankOptionsBuilder; struct PowOptions; +struct PowOptionsBuilder; struct FakeQuantOptions; +struct FakeQuantOptionsBuilder; struct PackOptions; +struct PackOptionsBuilder; struct LogicalOrOptions; +struct LogicalOrOptionsBuilder; struct OneHotOptions; +struct OneHotOptionsBuilder; struct AbsOptions; +struct AbsOptionsBuilder; struct HardSwishOptions; +struct HardSwishOptionsBuilder; struct LogicalAndOptions; +struct LogicalAndOptionsBuilder; struct LogicalNotOptions; +struct LogicalNotOptionsBuilder; struct UnpackOptions; +struct UnpackOptionsBuilder; struct FloorDivOptions; +struct FloorDivOptionsBuilder; struct SquareOptions; +struct SquareOptionsBuilder; struct ZerosLikeOptions; +struct ZerosLikeOptionsBuilder; struct FillOptions; +struct FillOptionsBuilder; struct FloorModOptions; +struct FloorModOptionsBuilder; struct RangeOptions; +struct RangeOptionsBuilder; struct LeakyReluOptions; +struct LeakyReluOptionsBuilder; struct SquaredDifferenceOptions; +struct SquaredDifferenceOptionsBuilder; struct MirrorPadOptions; +struct MirrorPadOptionsBuilder; struct UniqueOptions; +struct UniqueOptionsBuilder; struct ReverseV2Options; +struct ReverseV2OptionsBuilder; struct AddNOptions; +struct AddNOptionsBuilder; struct GatherNdOptions; +struct GatherNdOptionsBuilder; struct WhereOptions; +struct WhereOptionsBuilder; struct ReverseSequenceOptions; +struct ReverseSequenceOptionsBuilder; struct MatrixDiagOptions; +struct MatrixDiagOptionsBuilder; struct QuantizeOptions; +struct QuantizeOptionsBuilder; struct MatrixSetDiagOptions; +struct MatrixSetDiagOptionsBuilder; struct IfOptions; +struct IfOptionsBuilder; + +struct CallOnceOptions; +struct CallOnceOptionsBuilder; struct WhileOptions; +struct WhileOptionsBuilder; struct NonMaxSuppressionV4Options; +struct NonMaxSuppressionV4OptionsBuilder; struct NonMaxSuppressionV5Options; +struct NonMaxSuppressionV5OptionsBuilder; struct ScatterNdOptions; +struct ScatterNdOptionsBuilder; struct SelectV2Options; +struct SelectV2OptionsBuilder; struct DensifyOptions; +struct DensifyOptionsBuilder; struct SegmentSumOptions; +struct SegmentSumOptionsBuilder; struct BatchMatMulOptions; +struct BatchMatMulOptionsBuilder; + +struct CumsumOptions; +struct CumsumOptionsBuilder; + +struct BroadcastToOptions; +struct BroadcastToOptionsBuilder; + +struct Rfft2dOptions; +struct Rfft2dOptionsBuilder; + +struct HashtableOptions; +struct HashtableOptionsBuilder; + +struct HashtableFindOptions; +struct HashtableFindOptionsBuilder; + +struct HashtableImportOptions; +struct HashtableImportOptionsBuilder; + +struct HashtableSizeOptions; +struct HashtableSizeOptionsBuilder; + +struct VarHandleOptions; +struct VarHandleOptionsBuilder; + +struct ReadVariableOptions; +struct ReadVariableOptionsBuilder; + +struct AssignVariableOptions; +struct AssignVariableOptionsBuilder; + +struct RandomOptions; +struct RandomOptionsBuilder; struct OperatorCode; +struct OperatorCodeBuilder; struct Operator; +struct OperatorBuilder; struct SubGraph; +struct SubGraphBuilder; struct Buffer; +struct BufferBuilder; struct Metadata; +struct MetadataBuilder; + +struct TensorMap; +struct TensorMapBuilder; + +struct SignatureDef; +struct SignatureDefBuilder; struct Model; +struct ModelBuilder; -enum TensorType +enum TensorType : int8_t { TensorType_FLOAT32 = 0, TensorType_FLOAT16 = 1, @@ -268,34 +428,43 @@ enum TensorType TensorType_COMPLEX64 = 8, TensorType_INT8 = 9, TensorType_FLOAT64 = 10, + TensorType_COMPLEX128 = 11, + TensorType_UINT64 = 12, + TensorType_RESOURCE = 13, + TensorType_VARIANT = 14, + TensorType_UINT32 = 15, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_FLOAT64 + TensorType_MAX = TensorType_UINT32 }; -inline const TensorType (&EnumValuesTensorType())[11] +inline const TensorType (&EnumValuesTensorType())[16] { - static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, - TensorType_UINT8, TensorType_INT64, TensorType_STRING, - TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, - TensorType_INT8, TensorType_FLOAT64}; + static const TensorType values[] = { + TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8, + TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16, + TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, TensorType_COMPLEX128, + TensorType_UINT64, TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32}; return values; } inline const char *const *EnumNamesTensorType() { - static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", - "INT64", "STRING", "BOOL", "INT16", - "COMPLEX64", "INT8", "FLOAT64", nullptr}; + static const char *const names[17] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", + "STRING", "BOOL", "INT16", "COMPLEX64", "INT8", + "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", "VARIANT", + "UINT32", nullptr}; return names; } inline const char *EnumNameTensorType(TensorType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesTensorType()[index]; } -enum QuantizationDetails +enum QuantizationDetails : uint8_t { QuantizationDetails_NONE = 0, QuantizationDetails_CustomQuantization = 1, @@ -312,13 +481,15 @@ inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] inline const char *const *EnumNamesQuantizationDetails() { - static const char *const names[] = {"NONE", "CustomQuantization", nullptr}; + static const char *const names[3] = {"NONE", "CustomQuantization", nullptr}; return names; } inline const char *EnumNameQuantizationDetails(QuantizationDetails e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesQuantizationDetails()[index]; } @@ -327,7 +498,7 @@ template <typename T> struct QuantizationDetailsTraits static const QuantizationDetails enum_value = QuantizationDetails_NONE; }; -template <> struct QuantizationDetailsTraits<CustomQuantization> +template <> struct QuantizationDetailsTraits<onert_tflite::CustomQuantization> { static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; }; @@ -338,7 +509,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum DimensionType +enum DimensionType : int8_t { DimensionType_DENSE = 0, DimensionType_SPARSE_CSR = 1, @@ -354,17 +525,19 @@ inline const DimensionType (&EnumValuesDimensionType())[2] inline const char *const *EnumNamesDimensionType() { - static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr}; + static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr}; return names; } inline const char *EnumNameDimensionType(DimensionType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesDimensionType()[index]; } -enum SparseIndexVector +enum SparseIndexVector : uint8_t { SparseIndexVector_NONE = 0, SparseIndexVector_Int32Vector = 1, @@ -384,14 +557,16 @@ inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4] inline const char *const *EnumNamesSparseIndexVector() { - static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector", - nullptr}; + static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector", + nullptr}; return names; } inline const char *EnumNameSparseIndexVector(SparseIndexVector e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesSparseIndexVector()[index]; } @@ -400,17 +575,17 @@ template <typename T> struct SparseIndexVectorTraits static const SparseIndexVector enum_value = SparseIndexVector_NONE; }; -template <> struct SparseIndexVectorTraits<Int32Vector> +template <> struct SparseIndexVectorTraits<onert_tflite::Int32Vector> { static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector; }; -template <> struct SparseIndexVectorTraits<Uint16Vector> +template <> struct SparseIndexVectorTraits<onert_tflite::Uint16Vector> { static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector; }; -template <> struct SparseIndexVectorTraits<Uint8Vector> +template <> struct SparseIndexVectorTraits<onert_tflite::Uint8Vector> { static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector; }; @@ -421,7 +596,7 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum BuiltinOperator +enum BuiltinOperator : int32_t { BuiltinOperator_ADD = 0, BuiltinOperator_AVERAGE_POOL_2D = 1, @@ -550,11 +725,31 @@ enum BuiltinOperator BuiltinOperator_DENSIFY = 124, BuiltinOperator_SEGMENT_SUM = 125, BuiltinOperator_BATCH_MATMUL = 126, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + BuiltinOperator_CUMSUM = 128, + BuiltinOperator_CALL_ONCE = 129, + BuiltinOperator_BROADCAST_TO = 130, + BuiltinOperator_RFFT2D = 131, + BuiltinOperator_CONV_3D = 132, + BuiltinOperator_IMAG = 133, + BuiltinOperator_REAL = 134, + BuiltinOperator_COMPLEX_ABS = 135, + BuiltinOperator_HASHTABLE = 136, + BuiltinOperator_HASHTABLE_FIND = 137, + BuiltinOperator_HASHTABLE_IMPORT = 138, + BuiltinOperator_HASHTABLE_SIZE = 139, + BuiltinOperator_REDUCE_ALL = 140, + BuiltinOperator_CONV_3D_TRANSPOSE = 141, + BuiltinOperator_VAR_HANDLE = 142, + BuiltinOperator_READ_VARIABLE = 143, + BuiltinOperator_ASSIGN_VARIABLE = 144, + BuiltinOperator_BROADCAST_ARGS = 145, + BuiltinOperator_RANDOM_STANDARD_NORMAL = 146, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL + BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127] +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[147] { static const BuiltinOperator values[] = {BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -682,150 +877,192 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127] BuiltinOperator_SELECT_V2, BuiltinOperator_DENSIFY, BuiltinOperator_SEGMENT_SUM, - BuiltinOperator_BATCH_MATMUL}; + BuiltinOperator_BATCH_MATMUL, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES, + BuiltinOperator_CUMSUM, + BuiltinOperator_CALL_ONCE, + BuiltinOperator_BROADCAST_TO, + BuiltinOperator_RFFT2D, + BuiltinOperator_CONV_3D, + BuiltinOperator_IMAG, + BuiltinOperator_REAL, + BuiltinOperator_COMPLEX_ABS, + BuiltinOperator_HASHTABLE, + BuiltinOperator_HASHTABLE_FIND, + BuiltinOperator_HASHTABLE_IMPORT, + BuiltinOperator_HASHTABLE_SIZE, + BuiltinOperator_REDUCE_ALL, + BuiltinOperator_CONV_3D_TRANSPOSE, + BuiltinOperator_VAR_HANDLE, + BuiltinOperator_READ_VARIABLE, + BuiltinOperator_ASSIGN_VARIABLE, + BuiltinOperator_BROADCAST_ARGS, + BuiltinOperator_RANDOM_STANDARD_NORMAL}; return values; } inline const char *const *EnumNamesBuiltinOperator() { - static const char *const names[] = {"ADD", - "AVERAGE_POOL_2D", - "CONCATENATION", - "CONV_2D", - "DEPTHWISE_CONV_2D", - "DEPTH_TO_SPACE", - "DEQUANTIZE", - "EMBEDDING_LOOKUP", - "FLOOR", - "FULLY_CONNECTED", - "HASHTABLE_LOOKUP", - "L2_NORMALIZATION", - "L2_POOL_2D", - "LOCAL_RESPONSE_NORMALIZATION", - "LOGISTIC", - "LSH_PROJECTION", - "LSTM", - "MAX_POOL_2D", - "MUL", - "RELU", - "RELU_N1_TO_1", - "RELU6", - "RESHAPE", - "RESIZE_BILINEAR", - "RNN", - "SOFTMAX", - "SPACE_TO_DEPTH", - "SVDF", - "TANH", - "CONCAT_EMBEDDINGS", - "SKIP_GRAM", - "CALL", - "CUSTOM", - "EMBEDDING_LOOKUP_SPARSE", - "PAD", - "UNIDIRECTIONAL_SEQUENCE_RNN", - "GATHER", - "BATCH_TO_SPACE_ND", - "SPACE_TO_BATCH_ND", - "TRANSPOSE", - "MEAN", - "SUB", - "DIV", - "SQUEEZE", - "UNIDIRECTIONAL_SEQUENCE_LSTM", - "STRIDED_SLICE", - "BIDIRECTIONAL_SEQUENCE_RNN", - "EXP", - "TOPK_V2", - "SPLIT", - "LOG_SOFTMAX", - "DELEGATE", - "BIDIRECTIONAL_SEQUENCE_LSTM", - "CAST", - "PRELU", - "MAXIMUM", - "ARG_MAX", - "MINIMUM", - "LESS", - "NEG", - "PADV2", - "GREATER", - "GREATER_EQUAL", - "LESS_EQUAL", - "SELECT", - "SLICE", - "SIN", - "TRANSPOSE_CONV", - "SPARSE_TO_DENSE", - "TILE", - "EXPAND_DIMS", - "EQUAL", - "NOT_EQUAL", - "LOG", - "SUM", - "SQRT", - "RSQRT", - "SHAPE", - "POW", - "ARG_MIN", - "FAKE_QUANT", - "REDUCE_PROD", - "REDUCE_MAX", - "PACK", - "LOGICAL_OR", - "ONE_HOT", - "LOGICAL_AND", - "LOGICAL_NOT", - "UNPACK", - "REDUCE_MIN", - "FLOOR_DIV", - "REDUCE_ANY", - "SQUARE", - "ZEROS_LIKE", - "FILL", - "FLOOR_MOD", - "RANGE", - "RESIZE_NEAREST_NEIGHBOR", - "LEAKY_RELU", - "SQUARED_DIFFERENCE", - "MIRROR_PAD", - "ABS", - "SPLIT_V", - "UNIQUE", - "CEIL", - "REVERSE_V2", - "ADD_N", - "GATHER_ND", - "COS", - "WHERE", - "RANK", - "ELU", - "REVERSE_SEQUENCE", - "MATRIX_DIAG", - "QUANTIZE", - "MATRIX_SET_DIAG", - "ROUND", - "HARD_SWISH", - "IF", - "WHILE", - "NON_MAX_SUPPRESSION_V4", - "NON_MAX_SUPPRESSION_V5", - "SCATTER_ND", - "SELECT_V2", - "DENSIFY", - "SEGMENT_SUM", - "BATCH_MATMUL", - nullptr}; + static const char *const names[148] = {"ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "DEPTH_TO_SPACE", + "DEQUANTIZE", + "EMBEDDING_LOOKUP", + "FLOOR", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + "SPLIT", + "LOG_SOFTMAX", + "DELEGATE", + "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", + "PRELU", + "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", + "NEG", + "PADV2", + "GREATER", + "GREATER_EQUAL", + "LESS_EQUAL", + "SELECT", + "SLICE", + "SIN", + "TRANSPOSE_CONV", + "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", + "EQUAL", + "NOT_EQUAL", + "LOG", + "SUM", + "SQRT", + "RSQRT", + "SHAPE", + "POW", + "ARG_MIN", + "FAKE_QUANT", + "REDUCE_PROD", + "REDUCE_MAX", + "PACK", + "LOGICAL_OR", + "ONE_HOT", + "LOGICAL_AND", + "LOGICAL_NOT", + "UNPACK", + "REDUCE_MIN", + "FLOOR_DIV", + "REDUCE_ANY", + "SQUARE", + "ZEROS_LIKE", + "FILL", + "FLOOR_MOD", + "RANGE", + "RESIZE_NEAREST_NEIGHBOR", + "LEAKY_RELU", + "SQUARED_DIFFERENCE", + "MIRROR_PAD", + "ABS", + "SPLIT_V", + "UNIQUE", + "CEIL", + "REVERSE_V2", + "ADD_N", + "GATHER_ND", + "COS", + "WHERE", + "RANK", + "ELU", + "REVERSE_SEQUENCE", + "MATRIX_DIAG", + "QUANTIZE", + "MATRIX_SET_DIAG", + "ROUND", + "HARD_SWISH", + "IF", + "WHILE", + "NON_MAX_SUPPRESSION_V4", + "NON_MAX_SUPPRESSION_V5", + "SCATTER_ND", + "SELECT_V2", + "DENSIFY", + "SEGMENT_SUM", + "BATCH_MATMUL", + "PLACEHOLDER_FOR_GREATER_OP_CODES", + "CUMSUM", + "CALL_ONCE", + "BROADCAST_TO", + "RFFT2D", + "CONV_3D", + "IMAG", + "REAL", + "COMPLEX_ABS", + "HASHTABLE", + "HASHTABLE_FIND", + "HASHTABLE_IMPORT", + "HASHTABLE_SIZE", + "REDUCE_ALL", + "CONV_3D_TRANSPOSE", + "VAR_HANDLE", + "READ_VARIABLE", + "ASSIGN_VARIABLE", + "BROADCAST_ARGS", + "RANDOM_STANDARD_NORMAL", + nullptr}; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_RANDOM_STANDARD_NORMAL)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesBuiltinOperator()[index]; } -enum BuiltinOptions +enum BuiltinOptions : uint8_t { BuiltinOptions_NONE = 0, BuiltinOptions_Conv2DOptions = 1, @@ -929,11 +1166,24 @@ enum BuiltinOptions BuiltinOptions_DensifyOptions = 99, BuiltinOptions_SegmentSumOptions = 100, BuiltinOptions_BatchMatMulOptions = 101, + BuiltinOptions_CumsumOptions = 102, + BuiltinOptions_CallOnceOptions = 103, + BuiltinOptions_BroadcastToOptions = 104, + BuiltinOptions_Rfft2dOptions = 105, + BuiltinOptions_Conv3DOptions = 106, + BuiltinOptions_HashtableOptions = 107, + BuiltinOptions_HashtableFindOptions = 108, + BuiltinOptions_HashtableImportOptions = 109, + BuiltinOptions_HashtableSizeOptions = 110, + BuiltinOptions_VarHandleOptions = 111, + BuiltinOptions_ReadVariableOptions = 112, + BuiltinOptions_AssignVariableOptions = 113, + BuiltinOptions_RandomOptions = 114, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions + BuiltinOptions_MAX = BuiltinOptions_RandomOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102] +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115] { static const BuiltinOptions values[] = {BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1036,121 +1286,149 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102] BuiltinOptions_SelectV2Options, BuiltinOptions_DensifyOptions, BuiltinOptions_SegmentSumOptions, - BuiltinOptions_BatchMatMulOptions}; + BuiltinOptions_BatchMatMulOptions, + BuiltinOptions_CumsumOptions, + BuiltinOptions_CallOnceOptions, + BuiltinOptions_BroadcastToOptions, + BuiltinOptions_Rfft2dOptions, + BuiltinOptions_Conv3DOptions, + BuiltinOptions_HashtableOptions, + BuiltinOptions_HashtableFindOptions, + BuiltinOptions_HashtableImportOptions, + BuiltinOptions_HashtableSizeOptions, + BuiltinOptions_VarHandleOptions, + BuiltinOptions_ReadVariableOptions, + BuiltinOptions_AssignVariableOptions, + BuiltinOptions_RandomOptions}; return values; } inline const char *const *EnumNamesBuiltinOptions() { - static const char *const names[] = {"NONE", - "Conv2DOptions", - "DepthwiseConv2DOptions", - "ConcatEmbeddingsOptions", - "LSHProjectionOptions", - "Pool2DOptions", - "SVDFOptions", - "RNNOptions", - "FullyConnectedOptions", - "SoftmaxOptions", - "ConcatenationOptions", - "AddOptions", - "L2NormOptions", - "LocalResponseNormalizationOptions", - "LSTMOptions", - "ResizeBilinearOptions", - "CallOptions", - "ReshapeOptions", - "SkipGramOptions", - "SpaceToDepthOptions", - "EmbeddingLookupSparseOptions", - "MulOptions", - "PadOptions", - "GatherOptions", - "BatchToSpaceNDOptions", - "SpaceToBatchNDOptions", - "TransposeOptions", - "ReducerOptions", - "SubOptions", - "DivOptions", - "SqueezeOptions", - "SequenceRNNOptions", - "StridedSliceOptions", - "ExpOptions", - "TopKV2Options", - "SplitOptions", - "LogSoftmaxOptions", - "CastOptions", - "DequantizeOptions", - "MaximumMinimumOptions", - "ArgMaxOptions", - "LessOptions", - "NegOptions", - "PadV2Options", - "GreaterOptions", - "GreaterEqualOptions", - "LessEqualOptions", - "SelectOptions", - "SliceOptions", - "TransposeConvOptions", - "SparseToDenseOptions", - "TileOptions", - "ExpandDimsOptions", - "EqualOptions", - "NotEqualOptions", - "ShapeOptions", - "PowOptions", - "ArgMinOptions", - "FakeQuantOptions", - "PackOptions", - "LogicalOrOptions", - "OneHotOptions", - "LogicalAndOptions", - "LogicalNotOptions", - "UnpackOptions", - "FloorDivOptions", - "SquareOptions", - "ZerosLikeOptions", - "FillOptions", - "BidirectionalSequenceLSTMOptions", - "BidirectionalSequenceRNNOptions", - "UnidirectionalSequenceLSTMOptions", - "FloorModOptions", - "RangeOptions", - "ResizeNearestNeighborOptions", - "LeakyReluOptions", - "SquaredDifferenceOptions", - "MirrorPadOptions", - "AbsOptions", - "SplitVOptions", - "UniqueOptions", - "ReverseV2Options", - "AddNOptions", - "GatherNdOptions", - "CosOptions", - "WhereOptions", - "RankOptions", - "ReverseSequenceOptions", - "MatrixDiagOptions", - "QuantizeOptions", - "MatrixSetDiagOptions", - "HardSwishOptions", - "IfOptions", - "WhileOptions", - "DepthToSpaceOptions", - "NonMaxSuppressionV4Options", - "NonMaxSuppressionV5Options", - "ScatterNdOptions", - "SelectV2Options", - "DensifyOptions", - "SegmentSumOptions", - "BatchMatMulOptions", - nullptr}; + static const char *const names[116] = {"NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "ReducerOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + "SplitOptions", + "LogSoftmaxOptions", + "CastOptions", + "DequantizeOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", + "NegOptions", + "PadV2Options", + "GreaterOptions", + "GreaterEqualOptions", + "LessEqualOptions", + "SelectOptions", + "SliceOptions", + "TransposeConvOptions", + "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", + "EqualOptions", + "NotEqualOptions", + "ShapeOptions", + "PowOptions", + "ArgMinOptions", + "FakeQuantOptions", + "PackOptions", + "LogicalOrOptions", + "OneHotOptions", + "LogicalAndOptions", + "LogicalNotOptions", + "UnpackOptions", + "FloorDivOptions", + "SquareOptions", + "ZerosLikeOptions", + "FillOptions", + "BidirectionalSequenceLSTMOptions", + "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", + "ResizeNearestNeighborOptions", + "LeakyReluOptions", + "SquaredDifferenceOptions", + "MirrorPadOptions", + "AbsOptions", + "SplitVOptions", + "UniqueOptions", + "ReverseV2Options", + "AddNOptions", + "GatherNdOptions", + "CosOptions", + "WhereOptions", + "RankOptions", + "ReverseSequenceOptions", + "MatrixDiagOptions", + "QuantizeOptions", + "MatrixSetDiagOptions", + "HardSwishOptions", + "IfOptions", + "WhileOptions", + "DepthToSpaceOptions", + "NonMaxSuppressionV4Options", + "NonMaxSuppressionV5Options", + "ScatterNdOptions", + "SelectV2Options", + "DensifyOptions", + "SegmentSumOptions", + "BatchMatMulOptions", + "CumsumOptions", + "CallOnceOptions", + "BroadcastToOptions", + "Rfft2dOptions", + "Conv3DOptions", + "HashtableOptions", + "HashtableFindOptions", + "HashtableImportOptions", + "HashtableSizeOptions", + "VarHandleOptions", + "ReadVariableOptions", + "AssignVariableOptions", + "RandomOptions", + nullptr}; return names; } inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_RandomOptions)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesBuiltinOptions()[index]; } @@ -1159,517 +1437,582 @@ template <typename T> struct BuiltinOptionsTraits static const BuiltinOptions enum_value = BuiltinOptions_NONE; }; -template <> struct BuiltinOptionsTraits<Conv2DOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::Conv2DOptions> { static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; }; -template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::DepthwiseConv2DOptions> { static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; }; -template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ConcatEmbeddingsOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; }; -template <> struct BuiltinOptionsTraits<LSHProjectionOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LSHProjectionOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; }; -template <> struct BuiltinOptionsTraits<Pool2DOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::Pool2DOptions> { static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; }; -template <> struct BuiltinOptionsTraits<SVDFOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SVDFOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; }; -template <> struct BuiltinOptionsTraits<RNNOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::RNNOptions> { static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; }; -template <> struct BuiltinOptionsTraits<FullyConnectedOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::FullyConnectedOptions> { static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; }; -template <> struct BuiltinOptionsTraits<SoftmaxOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SoftmaxOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; }; -template <> struct BuiltinOptionsTraits<ConcatenationOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ConcatenationOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; }; -template <> struct BuiltinOptionsTraits<AddOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::AddOptions> { static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; }; -template <> struct BuiltinOptionsTraits<L2NormOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::L2NormOptions> { static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; }; -template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LocalResponseNormalizationOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; }; -template <> struct BuiltinOptionsTraits<LSTMOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LSTMOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; }; -template <> struct BuiltinOptionsTraits<ResizeBilinearOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ResizeBilinearOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; }; -template <> struct BuiltinOptionsTraits<CallOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::CallOptions> { static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; }; -template <> struct BuiltinOptionsTraits<ReshapeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ReshapeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; }; -template <> struct BuiltinOptionsTraits<SkipGramOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SkipGramOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; }; -template <> struct BuiltinOptionsTraits<SpaceToDepthOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToDepthOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; }; -template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::EmbeddingLookupSparseOptions> { static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; }; -template <> struct BuiltinOptionsTraits<MulOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::MulOptions> { static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; }; -template <> struct BuiltinOptionsTraits<PadOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::PadOptions> { static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; }; -template <> struct BuiltinOptionsTraits<GatherOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::GatherOptions> { static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; }; -template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::BatchToSpaceNDOptions> { static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; }; -template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToBatchNDOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; }; -template <> struct BuiltinOptionsTraits<TransposeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::TransposeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; }; -template <> struct BuiltinOptionsTraits<ReducerOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ReducerOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; }; -template <> struct BuiltinOptionsTraits<SubOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SubOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; }; -template <> struct BuiltinOptionsTraits<DivOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::DivOptions> { static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; }; -template <> struct BuiltinOptionsTraits<SqueezeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SqueezeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; }; -template <> struct BuiltinOptionsTraits<SequenceRNNOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SequenceRNNOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; }; -template <> struct BuiltinOptionsTraits<StridedSliceOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::StridedSliceOptions> { static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; }; -template <> struct BuiltinOptionsTraits<ExpOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ExpOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; }; -template <> struct BuiltinOptionsTraits<TopKV2Options> +template <> struct BuiltinOptionsTraits<onert_tflite::TopKV2Options> { static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; }; -template <> struct BuiltinOptionsTraits<SplitOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SplitOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; }; -template <> struct BuiltinOptionsTraits<LogSoftmaxOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LogSoftmaxOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; }; -template <> struct BuiltinOptionsTraits<CastOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::CastOptions> { static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; }; -template <> struct BuiltinOptionsTraits<DequantizeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::DequantizeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; }; -template <> struct BuiltinOptionsTraits<MaximumMinimumOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::MaximumMinimumOptions> { static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; }; -template <> struct BuiltinOptionsTraits<ArgMaxOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ArgMaxOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; }; -template <> struct BuiltinOptionsTraits<LessOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LessOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; }; -template <> struct BuiltinOptionsTraits<NegOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::NegOptions> { static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; }; -template <> struct BuiltinOptionsTraits<PadV2Options> +template <> struct BuiltinOptionsTraits<onert_tflite::PadV2Options> { static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; }; -template <> struct BuiltinOptionsTraits<GreaterOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::GreaterOptions> { static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; }; -template <> struct BuiltinOptionsTraits<GreaterEqualOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::GreaterEqualOptions> { static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; }; -template <> struct BuiltinOptionsTraits<LessEqualOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LessEqualOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; }; -template <> struct BuiltinOptionsTraits<SelectOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SelectOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; }; -template <> struct BuiltinOptionsTraits<SliceOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SliceOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; }; -template <> struct BuiltinOptionsTraits<TransposeConvOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::TransposeConvOptions> { static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; }; -template <> struct BuiltinOptionsTraits<SparseToDenseOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SparseToDenseOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; }; -template <> struct BuiltinOptionsTraits<TileOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::TileOptions> { static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; }; -template <> struct BuiltinOptionsTraits<ExpandDimsOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ExpandDimsOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; }; -template <> struct BuiltinOptionsTraits<EqualOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::EqualOptions> { static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; }; -template <> struct BuiltinOptionsTraits<NotEqualOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::NotEqualOptions> { static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; }; -template <> struct BuiltinOptionsTraits<ShapeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ShapeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; }; -template <> struct BuiltinOptionsTraits<PowOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::PowOptions> { static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; }; -template <> struct BuiltinOptionsTraits<ArgMinOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ArgMinOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; }; -template <> struct BuiltinOptionsTraits<FakeQuantOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::FakeQuantOptions> { static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; }; -template <> struct BuiltinOptionsTraits<PackOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::PackOptions> { static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; }; -template <> struct BuiltinOptionsTraits<LogicalOrOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LogicalOrOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; }; -template <> struct BuiltinOptionsTraits<OneHotOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::OneHotOptions> { static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; }; -template <> struct BuiltinOptionsTraits<LogicalAndOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LogicalAndOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; }; -template <> struct BuiltinOptionsTraits<LogicalNotOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LogicalNotOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; }; -template <> struct BuiltinOptionsTraits<UnpackOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::UnpackOptions> { static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; }; -template <> struct BuiltinOptionsTraits<FloorDivOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::FloorDivOptions> { static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; }; -template <> struct BuiltinOptionsTraits<SquareOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SquareOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; }; -template <> struct BuiltinOptionsTraits<ZerosLikeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ZerosLikeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; }; -template <> struct BuiltinOptionsTraits<FillOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::FillOptions> { static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; }; -template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceLSTMOptions> { static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; }; -template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceRNNOptions> { static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; }; -template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::UnidirectionalSequenceLSTMOptions> { static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; }; -template <> struct BuiltinOptionsTraits<FloorModOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::FloorModOptions> { static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; }; -template <> struct BuiltinOptionsTraits<RangeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::RangeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; }; -template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ResizeNearestNeighborOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; }; -template <> struct BuiltinOptionsTraits<LeakyReluOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::LeakyReluOptions> { static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; }; -template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SquaredDifferenceOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; }; -template <> struct BuiltinOptionsTraits<MirrorPadOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::MirrorPadOptions> { static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; }; -template <> struct BuiltinOptionsTraits<AbsOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::AbsOptions> { static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; }; -template <> struct BuiltinOptionsTraits<SplitVOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SplitVOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; }; -template <> struct BuiltinOptionsTraits<UniqueOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::UniqueOptions> { static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions; }; -template <> struct BuiltinOptionsTraits<ReverseV2Options> +template <> struct BuiltinOptionsTraits<onert_tflite::ReverseV2Options> { static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options; }; -template <> struct BuiltinOptionsTraits<AddNOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::AddNOptions> { static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions; }; -template <> struct BuiltinOptionsTraits<GatherNdOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::GatherNdOptions> { static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions; }; -template <> struct BuiltinOptionsTraits<CosOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::CosOptions> { static const BuiltinOptions enum_value = BuiltinOptions_CosOptions; }; -template <> struct BuiltinOptionsTraits<WhereOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::WhereOptions> { static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; }; -template <> struct BuiltinOptionsTraits<RankOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::RankOptions> { static const BuiltinOptions enum_value = BuiltinOptions_RankOptions; }; -template <> struct BuiltinOptionsTraits<ReverseSequenceOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ReverseSequenceOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions; }; -template <> struct BuiltinOptionsTraits<MatrixDiagOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::MatrixDiagOptions> { static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions; }; -template <> struct BuiltinOptionsTraits<QuantizeOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::QuantizeOptions> { static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions; }; -template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::MatrixSetDiagOptions> { static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions; }; -template <> struct BuiltinOptionsTraits<HardSwishOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::HardSwishOptions> { static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions; }; -template <> struct BuiltinOptionsTraits<IfOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::IfOptions> { static const BuiltinOptions enum_value = BuiltinOptions_IfOptions; }; -template <> struct BuiltinOptionsTraits<WhileOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::WhileOptions> { static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions; }; -template <> struct BuiltinOptionsTraits<DepthToSpaceOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::DepthToSpaceOptions> { static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions; }; -template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options> +template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV4Options> { static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options; }; -template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options> +template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV5Options> { static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options; }; -template <> struct BuiltinOptionsTraits<ScatterNdOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::ScatterNdOptions> { static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions; }; -template <> struct BuiltinOptionsTraits<SelectV2Options> +template <> struct BuiltinOptionsTraits<onert_tflite::SelectV2Options> { static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options; }; -template <> struct BuiltinOptionsTraits<DensifyOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::DensifyOptions> { static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions; }; -template <> struct BuiltinOptionsTraits<SegmentSumOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::SegmentSumOptions> { static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions; }; -template <> struct BuiltinOptionsTraits<BatchMatMulOptions> +template <> struct BuiltinOptionsTraits<onert_tflite::BatchMatMulOptions> { static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; }; +template <> struct BuiltinOptionsTraits<onert_tflite::CumsumOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::CallOnceOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::BroadcastToOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::Rfft2dOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::Conv3DOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::HashtableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::HashtableFindOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::HashtableImportOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::HashtableSizeOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::VarHandleOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::ReadVariableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::AssignVariableOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; +}; + +template <> struct BuiltinOptionsTraits<onert_tflite::RandomOptions> +{ + static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions; +}; + bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types); -enum Padding +enum Padding : int8_t { Padding_SAME = 0, Padding_VALID = 1, @@ -1685,17 +2028,19 @@ inline const Padding (&EnumValuesPadding())[2] inline const char *const *EnumNamesPadding() { - static const char *const names[] = {"SAME", "VALID", nullptr}; + static const char *const names[3] = {"SAME", "VALID", nullptr}; return names; } inline const char *EnumNamePadding(Padding e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesPadding()[index]; } -enum ActivationFunctionType +enum ActivationFunctionType : int8_t { ActivationFunctionType_NONE = 0, ActivationFunctionType_RELU = 1, @@ -1717,18 +2062,20 @@ inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] inline const char *const *EnumNamesActivationFunctionType() { - static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6", - "TANH", "SIGN_BIT", nullptr}; + static const char *const names[7] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6", + "TANH", "SIGN_BIT", nullptr}; return names; } inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesActivationFunctionType()[index]; } -enum LSHProjectionType +enum LSHProjectionType : int8_t { LSHProjectionType_UNKNOWN = 0, LSHProjectionType_SPARSE = 1, @@ -1746,17 +2093,19 @@ inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] inline const char *const *EnumNamesLSHProjectionType() { - static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr}; + static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr}; return names; } inline const char *EnumNameLSHProjectionType(LSHProjectionType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesLSHProjectionType()[index]; } -enum FullyConnectedOptionsWeightsFormat +enum FullyConnectedOptionsWeightsFormat : int8_t { FullyConnectedOptionsWeightsFormat_DEFAULT = 0, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, @@ -1774,17 +2123,20 @@ inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOption inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat() { - static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr}; + static const char *const names[3] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr}; return names; } inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesFullyConnectedOptionsWeightsFormat()[index]; } -enum LSTMKernelType +enum LSTMKernelType : int8_t { LSTMKernelType_FULL = 0, LSTMKernelType_BASIC = 1, @@ -1800,17 +2152,19 @@ inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] inline const char *const *EnumNamesLSTMKernelType() { - static const char *const names[] = {"FULL", "BASIC", nullptr}; + static const char *const names[3] = {"FULL", "BASIC", nullptr}; return names; } inline const char *EnumNameLSTMKernelType(LSTMKernelType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesLSTMKernelType()[index]; } -enum CombinerType +enum CombinerType : int8_t { CombinerType_SUM = 0, CombinerType_MEAN = 1, @@ -1827,17 +2181,19 @@ inline const CombinerType (&EnumValuesCombinerType())[3] inline const char *const *EnumNamesCombinerType() { - static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr}; + static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr}; return names; } inline const char *EnumNameCombinerType(CombinerType e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesCombinerType()[index]; } -enum MirrorPadMode +enum MirrorPadMode : int8_t { MirrorPadMode_REFLECT = 0, MirrorPadMode_SYMMETRIC = 1, @@ -1853,17 +2209,19 @@ inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] inline const char *const *EnumNamesMirrorPadMode() { - static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr}; + static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr}; return names; } inline const char *EnumNameMirrorPadMode(MirrorPadMode e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesMirrorPadMode()[index]; } -enum CustomOptionsFormat +enum CustomOptionsFormat : int8_t { CustomOptionsFormat_FLEXBUFFERS = 0, CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, @@ -1878,19 +2236,22 @@ inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] inline const char *const *EnumNamesCustomOptionsFormat() { - static const char *const names[] = {"FLEXBUFFERS", nullptr}; + static const char *const names[2] = {"FLEXBUFFERS", nullptr}; return names; } inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) { - const size_t index = static_cast<int>(e); + if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS)) + return ""; + const size_t index = static_cast<size_t>(e); return EnumNamesCustomOptionsFormat()[index]; } struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef CustomQuantizationBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_CUSTOM = 4 }; @@ -1907,6 +2268,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct CustomQuantizationBuilder { + typedef CustomQuantization Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) @@ -1917,7 +2279,6 @@ struct CustomQuantizationBuilder { start_ = fbb_.StartTable(); } - CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &); flatbuffers::Offset<CustomQuantization> Finish() { const auto end = fbb_.EndTable(start_); @@ -1939,13 +2300,18 @@ inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<uint8_t> *custom = nullptr) { - return onert_tflite::CreateCustomQuantization(_fbb, - custom ? _fbb.CreateVector<uint8_t>(*custom) : 0); + if (custom) + { + _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16); + } + auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0; + return onert_tflite::CreateCustomQuantization(_fbb, custom__); } struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef QuantizationParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MIN = 4, VT_MAX = 6, @@ -1971,16 +2337,16 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab { return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT); } - QuantizationDetails details_type() const + onert_tflite::QuantizationDetails details_type() const { - return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0)); + return static_cast<onert_tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0)); } const void *details() const { return GetPointer<const void *>(VT_DETAILS); } template <typename T> const T *details_as() const; - const CustomQuantization *details_as_CustomQuantization() const + const onert_tflite::CustomQuantization *details_as_CustomQuantization() const { - return details_type() == QuantizationDetails_CustomQuantization - ? static_cast<const CustomQuantization *>(details()) + return details_type() == onert_tflite::QuantizationDetails_CustomQuantization + ? static_cast<const onert_tflite::CustomQuantization *>(details()) : nullptr; } int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); } @@ -1998,13 +2364,15 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab }; template <> -inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const +inline const onert_tflite::CustomQuantization * +QuantizationParameters::details_as<onert_tflite::CustomQuantization>() const { return details_as_CustomQuantization(); } struct QuantizationParametersBuilder { + typedef QuantizationParameters Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) @@ -2023,7 +2391,7 @@ struct QuantizationParametersBuilder { fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); } - void add_details_type(QuantizationDetails details_type) + void add_details_type(onert_tflite::QuantizationDetails details_type) { fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE, static_cast<uint8_t>(details_type), 0); @@ -2041,7 +2409,6 @@ struct QuantizationParametersBuilder { start_ = fbb_.StartTable(); } - QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); flatbuffers::Offset<QuantizationParameters> Finish() { const auto end = fbb_.EndTable(start_); @@ -2050,14 +2417,13 @@ struct QuantizationParametersBuilder } }; -inline flatbuffers::Offset<QuantizationParameters> -CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, - flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, - flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, - flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, - QuantizationDetails details_type = QuantizationDetails_NONE, - flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) +inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters( + flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> max = 0, + flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0, + flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0, + onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE, + flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) { QuantizationParametersBuilder builder_(_fbb); builder_.add_quantized_dimension(quantized_dimension); @@ -2074,19 +2440,21 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersD flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr, const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr, const std::vector<int64_t> *zero_point = nullptr, - QuantizationDetails details_type = QuantizationDetails_NONE, + onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE, flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0) { - return onert_tflite::CreateQuantizationParameters( - _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0, - scale ? _fbb.CreateVector<float>(*scale) : 0, - zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details, - quantized_dimension); + auto min__ = min ? _fbb.CreateVector<float>(*min) : 0; + auto max__ = max ? _fbb.CreateVector<float>(*max) : 0; + auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0; + auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0; + return onert_tflite::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__, + details_type, details, quantized_dimension); } struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef Int32VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUES = 4 }; @@ -2103,6 +2471,7 @@ struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct Int32VectorBuilder { + typedef Int32Vector Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values) @@ -2113,7 +2482,6 @@ struct Int32VectorBuilder { start_ = fbb_.StartTable(); } - Int32VectorBuilder &operator=(const Int32VectorBuilder &); flatbuffers::Offset<Int32Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2135,12 +2503,14 @@ inline flatbuffers::Offset<Int32Vector> CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *values = nullptr) { - return onert_tflite::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0); + auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0; + return onert_tflite::CreateInt32Vector(_fbb, values__); } struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef Uint16VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUES = 4 }; @@ -2157,6 +2527,7 @@ struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct Uint16VectorBuilder { + typedef Uint16Vector Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values) @@ -2167,7 +2538,6 @@ struct Uint16VectorBuilder { start_ = fbb_.StartTable(); } - Uint16VectorBuilder &operator=(const Uint16VectorBuilder &); flatbuffers::Offset<Uint16Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2189,12 +2559,18 @@ inline flatbuffers::Offset<Uint16Vector> CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<uint16_t> *values = nullptr) { - return onert_tflite::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0); + if (values) + { + _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4); + } + auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0; + return onert_tflite::CreateUint16Vector(_fbb, values__); } struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef Uint8VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUES = 4 }; @@ -2211,6 +2587,7 @@ struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct Uint8VectorBuilder { + typedef Uint8Vector Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values) @@ -2221,7 +2598,6 @@ struct Uint8VectorBuilder { start_ = fbb_.StartTable(); } - Uint8VectorBuilder &operator=(const Uint8VectorBuilder &); flatbuffers::Offset<Uint8Vector> Finish() { const auto end = fbb_.EndTable(start_); @@ -2243,12 +2619,18 @@ inline flatbuffers::Offset<Uint8Vector> CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<uint8_t> *values = nullptr) { - return onert_tflite::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0); + if (values) + { + _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4); + } + auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0; + return onert_tflite::CreateUint8Vector(_fbb, values__); } struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef DimensionMetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FORMAT = 4, VT_DENSE_SIZE = 6, @@ -2257,57 +2639,59 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_ARRAY_INDICES_TYPE = 12, VT_ARRAY_INDICES = 14 }; - DimensionType format() const + onert_tflite::DimensionType format() const { - return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0)); + return static_cast<onert_tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0)); } int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); } - SparseIndexVector array_segments_type() const + onert_tflite::SparseIndexVector array_segments_type() const { - return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0)); + return static_cast<onert_tflite::SparseIndexVector>( + GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0)); } const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); } template <typename T> const T *array_segments_as() const; - const Int32Vector *array_segments_as_Int32Vector() const + const onert_tflite::Int32Vector *array_segments_as_Int32Vector() const { - return array_segments_type() == SparseIndexVector_Int32Vector - ? static_cast<const Int32Vector *>(array_segments()) + return array_segments_type() == onert_tflite::SparseIndexVector_Int32Vector + ? static_cast<const onert_tflite::Int32Vector *>(array_segments()) : nullptr; } - const Uint16Vector *array_segments_as_Uint16Vector() const + const onert_tflite::Uint16Vector *array_segments_as_Uint16Vector() const { - return array_segments_type() == SparseIndexVector_Uint16Vector - ? static_cast<const Uint16Vector *>(array_segments()) + return array_segments_type() == onert_tflite::SparseIndexVector_Uint16Vector + ? static_cast<const onert_tflite::Uint16Vector *>(array_segments()) : nullptr; } - const Uint8Vector *array_segments_as_Uint8Vector() const + const onert_tflite::Uint8Vector *array_segments_as_Uint8Vector() const { - return array_segments_type() == SparseIndexVector_Uint8Vector - ? static_cast<const Uint8Vector *>(array_segments()) + return array_segments_type() == onert_tflite::SparseIndexVector_Uint8Vector + ? static_cast<const onert_tflite::Uint8Vector *>(array_segments()) : nullptr; } - SparseIndexVector array_indices_type() const + onert_tflite::SparseIndexVector array_indices_type() const { - return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0)); + return static_cast<onert_tflite::SparseIndexVector>( + GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0)); } const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); } template <typename T> const T *array_indices_as() const; - const Int32Vector *array_indices_as_Int32Vector() const + const onert_tflite::Int32Vector *array_indices_as_Int32Vector() const { - return array_indices_type() == SparseIndexVector_Int32Vector - ? static_cast<const Int32Vector *>(array_indices()) + return array_indices_type() == onert_tflite::SparseIndexVector_Int32Vector + ? static_cast<const onert_tflite::Int32Vector *>(array_indices()) : nullptr; } - const Uint16Vector *array_indices_as_Uint16Vector() const + const onert_tflite::Uint16Vector *array_indices_as_Uint16Vector() const { - return array_indices_type() == SparseIndexVector_Uint16Vector - ? static_cast<const Uint16Vector *>(array_indices()) + return array_indices_type() == onert_tflite::SparseIndexVector_Uint16Vector + ? static_cast<const onert_tflite::Uint16Vector *>(array_indices()) : nullptr; } - const Uint8Vector *array_indices_as_Uint8Vector() const + const onert_tflite::Uint8Vector *array_indices_as_Uint8Vector() const { - return array_indices_type() == SparseIndexVector_Uint8Vector - ? static_cast<const Uint8Vector *>(array_indices()) + return array_indices_type() == onert_tflite::SparseIndexVector_Uint8Vector + ? static_cast<const onert_tflite::Uint8Vector *>(array_indices()) : nullptr; } bool Verify(flatbuffers::Verifier &verifier) const @@ -2324,41 +2708,54 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table } }; -template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const +template <> +inline const onert_tflite::Int32Vector * +DimensionMetadata::array_segments_as<onert_tflite::Int32Vector>() const { return array_segments_as_Int32Vector(); } -template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const +template <> +inline const onert_tflite::Uint16Vector * +DimensionMetadata::array_segments_as<onert_tflite::Uint16Vector>() const { return array_segments_as_Uint16Vector(); } -template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const +template <> +inline const onert_tflite::Uint8Vector * +DimensionMetadata::array_segments_as<onert_tflite::Uint8Vector>() const { return array_segments_as_Uint8Vector(); } -template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const +template <> +inline const onert_tflite::Int32Vector * +DimensionMetadata::array_indices_as<onert_tflite::Int32Vector>() const { return array_indices_as_Int32Vector(); } -template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const +template <> +inline const onert_tflite::Uint16Vector * +DimensionMetadata::array_indices_as<onert_tflite::Uint16Vector>() const { return array_indices_as_Uint16Vector(); } -template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const +template <> +inline const onert_tflite::Uint8Vector * +DimensionMetadata::array_indices_as<onert_tflite::Uint8Vector>() const { return array_indices_as_Uint8Vector(); } struct DimensionMetadataBuilder { + typedef DimensionMetadata Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_format(DimensionType format) + void add_format(onert_tflite::DimensionType format) { fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0); } @@ -2366,7 +2763,7 @@ struct DimensionMetadataBuilder { fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0); } - void add_array_segments_type(SparseIndexVector array_segments_type) + void add_array_segments_type(onert_tflite::SparseIndexVector array_segments_type) { fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE, static_cast<uint8_t>(array_segments_type), 0); @@ -2375,7 +2772,7 @@ struct DimensionMetadataBuilder { fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments); } - void add_array_indices_type(SparseIndexVector array_indices_type) + void add_array_indices_type(onert_tflite::SparseIndexVector array_indices_type) { fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE, static_cast<uint8_t>(array_indices_type), 0); @@ -2388,7 +2785,6 @@ struct DimensionMetadataBuilder { start_ = fbb_.StartTable(); } - DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &); flatbuffers::Offset<DimensionMetadata> Finish() { const auto end = fbb_.EndTable(start_); @@ -2397,13 +2793,13 @@ struct DimensionMetadataBuilder } }; -inline flatbuffers::Offset<DimensionMetadata> -CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, - DimensionType format = DimensionType_DENSE, int32_t dense_size = 0, - SparseIndexVector array_segments_type = SparseIndexVector_NONE, - flatbuffers::Offset<void> array_segments = 0, - SparseIndexVector array_indices_type = SparseIndexVector_NONE, - flatbuffers::Offset<void> array_indices = 0) +inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata( + flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::DimensionType format = onert_tflite::DimensionType_DENSE, int32_t dense_size = 0, + onert_tflite::SparseIndexVector array_segments_type = onert_tflite::SparseIndexVector_NONE, + flatbuffers::Offset<void> array_segments = 0, + onert_tflite::SparseIndexVector array_indices_type = onert_tflite::SparseIndexVector_NONE, + flatbuffers::Offset<void> array_indices = 0) { DimensionMetadataBuilder builder_(_fbb); builder_.add_array_indices(array_indices); @@ -2417,7 +2813,8 @@ CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SparsityParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TRAVERSAL_ORDER = 4, VT_BLOCK_MAP = 6, @@ -2431,9 +2828,11 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP); } - const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> * + dim_metadata() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>( + return GetPointer< + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *>( VT_DIM_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const @@ -2448,6 +2847,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SparsityParametersBuilder { + typedef SparsityParameters Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order) @@ -2459,7 +2859,8 @@ struct SparsityParametersBuilder fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); } void add_dim_metadata( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>> + dim_metadata) { fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); } @@ -2467,7 +2868,6 @@ struct SparsityParametersBuilder { start_ = fbb_.StartTable(); } - SparsityParametersBuilder &operator=(const SparsityParametersBuilder &); flatbuffers::Offset<SparsityParameters> Finish() { const auto end = fbb_.EndTable(start_); @@ -2480,7 +2880,8 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0, flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>> + dim_metadata = 0) { SparsityParametersBuilder builder_(_fbb); builder_.add_dim_metadata(dim_metadata); @@ -2492,17 +2893,22 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters( inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr, const std::vector<int32_t> *block_map = nullptr, - const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr) + const std::vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *dim_metadata = nullptr) { - return onert_tflite::CreateSparsityParameters( - _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0, - block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0, - dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0); + auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0; + auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0; + auto dim_metadata__ = + dim_metadata + ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>(*dim_metadata) + : 0; + return onert_tflite::CreateSparsityParameters(_fbb, traversal_order__, block_map__, + dim_metadata__); } struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef TensorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_SHAPE = 4, VT_TYPE = 6, @@ -2517,20 +2923,23 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE); } - TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); } + onert_tflite::TensorType type() const + { + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0)); + } uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); } const flatbuffers::String *name() const { return GetPointer<const flatbuffers::String *>(VT_NAME); } - const QuantizationParameters *quantization() const + const onert_tflite::QuantizationParameters *quantization() const { - return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION); + return GetPointer<const onert_tflite::QuantizationParameters *>(VT_QUANTIZATION); } bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; } - const SparsityParameters *sparsity() const + const onert_tflite::SparsityParameters *sparsity() const { - return GetPointer<const SparsityParameters *>(VT_SPARSITY); + return GetPointer<const onert_tflite::SparsityParameters *>(VT_SPARSITY); } const flatbuffers::Vector<int32_t> *shape_signature() const { @@ -2551,13 +2960,14 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct TensorBuilder { + typedef Tensor Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) { fbb_.AddOffset(Tensor::VT_SHAPE, shape); } - void add_type(TensorType type) + void add_type(onert_tflite::TensorType type) { fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0); } @@ -2566,7 +2976,7 @@ struct TensorBuilder { fbb_.AddOffset(Tensor::VT_NAME, name); } - void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization) + void add_quantization(flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization) { fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); } @@ -2574,7 +2984,7 @@ struct TensorBuilder { fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0); } - void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity) + void add_sparsity(flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity) { fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity); } @@ -2586,7 +2996,6 @@ struct TensorBuilder { start_ = fbb_.StartTable(); } - TensorBuilder &operator=(const TensorBuilder &); flatbuffers::Offset<Tensor> Finish() { const auto end = fbb_.EndTable(start_); @@ -2595,14 +3004,13 @@ struct TensorBuilder } }; -inline flatbuffers::Offset<Tensor> -CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, - flatbuffers::Offset<flatbuffers::String> name = 0, - flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false, - flatbuffers::Offset<SparsityParameters> sparsity = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0) +inline flatbuffers::Offset<Tensor> CreateTensor( + flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0, + onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0, + flatbuffers::Offset<flatbuffers::String> name = 0, + flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0, + bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0) { TensorBuilder builder_(_fbb); builder_.add_shape_signature(shape_signature); @@ -2618,20 +3026,23 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, inline flatbuffers::Offset<Tensor> CreateTensorDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, - flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false, - flatbuffers::Offset<SparsityParameters> sparsity = 0, + onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0, + const char *name = nullptr, + flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0, + bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0, const std::vector<int32_t> *shape_signature = nullptr) { - return onert_tflite::CreateTensor( - _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer, - name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity, - shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0); + auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0; + return onert_tflite::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable, + sparsity, shape_signature__); } struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef Conv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PADDING = 4, VT_STRIDE_W = 6, @@ -2640,12 +3051,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_DILATION_W_FACTOR = 12, VT_DILATION_H_FACTOR = 14 }; - Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + onert_tflite::Padding padding() const + { + return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } @@ -2662,9 +3077,10 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct Conv2DOptionsBuilder { + typedef Conv2DOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_padding(Padding padding) + void add_padding(onert_tflite::Padding padding) { fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); } @@ -2676,7 +3092,7 @@ struct Conv2DOptionsBuilder { fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -2693,7 +3109,6 @@ struct Conv2DOptionsBuilder { start_ = fbb_.StartTable(); } - Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); flatbuffers::Offset<Conv2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -2703,9 +3118,11 @@ struct Conv2DOptionsBuilder }; inline flatbuffers::Offset<Conv2DOptions> -CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, +CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::Padding padding = onert_tflite::Padding_SAME, int32_t stride_w = 0, int32_t stride_h = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { Conv2DOptionsBuilder builder_(_fbb); @@ -2718,9 +3135,121 @@ CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd return builder_.Finish(); } +struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef Conv3DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_PADDING = 4, + VT_STRIDE_D = 6, + VT_STRIDE_W = 8, + VT_STRIDE_H = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_D_FACTOR = 14, + VT_DILATION_W_FACTOR = 16, + VT_DILATION_H_FACTOR = 18 + }; + onert_tflite::Padding padding() const + { + return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } + int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); } + int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } + int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } + onert_tflite::ActivationFunctionType fused_activation_function() const + { + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); } + int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } + int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) && + VerifyField<int32_t>(verifier, VT_STRIDE_D) && + VerifyField<int32_t>(verifier, VT_STRIDE_W) && + VerifyField<int32_t>(verifier, VT_STRIDE_H) && + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) && + VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable(); + } +}; + +struct Conv3DOptionsBuilder +{ + typedef Conv3DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(onert_tflite::Padding padding) + { + fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); + } + void add_stride_d(int32_t stride_d) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0); + } + void add_stride_w(int32_t stride_w) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) + { + fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, + static_cast<int8_t>(fused_activation_function), 0); + } + void add_dilation_d_factor(int32_t dilation_d_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1); + } + void add_dilation_w_factor(int32_t dilation_w_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) + { + fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<Conv3DOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Conv3DOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions( + flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME, + int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, + int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) +{ + Conv3DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_dilation_d_factor(dilation_d_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_stride_d(stride_d); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef Pool2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PADDING = 4, VT_STRIDE_W = 6, @@ -2729,14 +3258,18 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_FILTER_HEIGHT = 12, VT_FUSED_ACTIVATION_FUNCTION = 14 }; - Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + onert_tflite::Padding padding() const + { + return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); } int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2751,9 +3284,10 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct Pool2DOptionsBuilder { + typedef Pool2DOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_padding(Padding padding) + void add_padding(onert_tflite::Padding padding) { fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); } @@ -2773,7 +3307,7 @@ struct Pool2DOptionsBuilder { fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -2782,7 +3316,6 @@ struct Pool2DOptionsBuilder { start_ = fbb_.StartTable(); } - Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); flatbuffers::Offset<Pool2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -2791,11 +3324,11 @@ struct Pool2DOptionsBuilder } }; -inline flatbuffers::Offset<Pool2DOptions> -CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, - int32_t filter_height = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE) { Pool2DOptionsBuilder builder_(_fbb); builder_.add_filter_height(filter_height); @@ -2809,7 +3342,8 @@ CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef DepthwiseConv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PADDING = 4, VT_STRIDE_W = 6, @@ -2819,13 +3353,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab VT_DILATION_W_FACTOR = 14, VT_DILATION_H_FACTOR = 16 }; - Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + onert_tflite::Padding padding() const + { + return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); } @@ -2843,9 +3381,10 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab struct DepthwiseConv2DOptionsBuilder { + typedef DepthwiseConv2DOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_padding(Padding padding) + void add_padding(onert_tflite::Padding padding) { fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0); } @@ -2861,7 +3400,7 @@ struct DepthwiseConv2DOptionsBuilder { fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -2878,7 +3417,6 @@ struct DepthwiseConv2DOptionsBuilder { start_ = fbb_.StartTable(); } - DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); flatbuffers::Offset<DepthwiseConv2DOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -2888,9 +3426,10 @@ struct DepthwiseConv2DOptionsBuilder }; inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, - int32_t stride_h = 0, int32_t depth_multiplier = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { DepthwiseConv2DOptionsBuilder builder_(_fbb); @@ -2906,7 +3445,8 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions( struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ConcatEmbeddingsOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NUM_CHANNELS = 4, VT_NUM_COLUMNS_PER_CHANNEL = 6, @@ -2933,6 +3473,7 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta struct ConcatEmbeddingsOptionsBuilder { + typedef ConcatEmbeddingsOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_num_channels(int32_t num_channels) @@ -2954,7 +3495,6 @@ struct ConcatEmbeddingsOptionsBuilder { start_ = fbb_.StartTable(); } - ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -2980,21 +3520,24 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_ const std::vector<int32_t> *num_columns_per_channel = nullptr, const std::vector<int32_t> *embedding_dim_per_channel = nullptr) { - return onert_tflite::CreateConcatEmbeddingsOptions( - _fbb, num_channels, - num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0, - embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0); + auto num_columns_per_channel__ = + num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0; + auto embedding_dim_per_channel__ = + embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0; + return onert_tflite::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__, + embedding_dim_per_channel__); } struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef LSHProjectionOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TYPE = 4 }; - LSHProjectionType type() const + onert_tflite::LSHProjectionType type() const { - return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0)); + return static_cast<onert_tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3005,9 +3548,10 @@ struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LSHProjectionOptionsBuilder { + typedef LSHProjectionOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_type(LSHProjectionType type) + void add_type(onert_tflite::LSHProjectionType type) { fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0); } @@ -3015,7 +3559,6 @@ struct LSHProjectionOptionsBuilder { start_ = fbb_.StartTable(); } - LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); flatbuffers::Offset<LSHProjectionOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3024,9 +3567,9 @@ struct LSHProjectionOptionsBuilder } }; -inline flatbuffers::Offset<LSHProjectionOptions> -CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, - LSHProjectionType type = LSHProjectionType_UNKNOWN) +inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions( + flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::LSHProjectionType type = onert_tflite::LSHProjectionType_UNKNOWN) { LSHProjectionOptionsBuilder builder_(_fbb); builder_.add_type(type); @@ -3035,16 +3578,18 @@ CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SVDFOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_RANK = 4, VT_FUSED_ACTIVATION_FUNCTION = 6, VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 }; int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3060,10 +3605,11 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SVDFOptionsBuilder { + typedef SVDFOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3077,7 +3623,6 @@ struct SVDFOptionsBuilder { start_ = fbb_.StartTable(); } - SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); flatbuffers::Offset<SVDFOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3088,7 +3633,8 @@ struct SVDFOptionsBuilder inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, bool asymmetric_quantize_inputs = false) { SVDFOptionsBuilder builder_(_fbb); @@ -3100,14 +3646,16 @@ CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef RNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_ASYMMETRIC_QUANTIZE_INPUTS = 6 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3123,9 +3671,10 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct RNNOptionsBuilder { + typedef RNNOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3139,7 +3688,6 @@ struct RNNOptionsBuilder { start_ = fbb_.StartTable(); } - RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); flatbuffers::Offset<RNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3150,7 +3698,8 @@ struct RNNOptionsBuilder inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, bool asymmetric_quantize_inputs = false) { RNNOptionsBuilder builder_(_fbb); @@ -3161,16 +3710,18 @@ CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TIME_MAJOR = 4, VT_FUSED_ACTIVATION_FUNCTION = 6, VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 }; bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3186,6 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SequenceRNNOptionsBuilder { + typedef SequenceRNNOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_time_major(bool time_major) @@ -3193,7 +3745,7 @@ struct SequenceRNNOptionsBuilder fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3207,7 +3759,6 @@ struct SequenceRNNOptionsBuilder { start_ = fbb_.StartTable(); } - SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &); flatbuffers::Offset<SequenceRNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3216,10 +3767,11 @@ struct SequenceRNNOptionsBuilder } }; -inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) +inline flatbuffers::Offset<SequenceRNNOptions> +CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3230,7 +3782,8 @@ inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions( struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef BidirectionalSequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TIME_MAJOR = 4, VT_FUSED_ACTIVATION_FUNCTION = 6, @@ -3238,9 +3791,10 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 }; bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3258,6 +3812,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf struct BidirectionalSequenceRNNOptionsBuilder { + typedef BidirectionalSequenceRNNOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_time_major(bool time_major) @@ -3265,7 +3820,7 @@ struct BidirectionalSequenceRNNOptionsBuilder fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3284,7 +3839,6 @@ struct BidirectionalSequenceRNNOptionsBuilder { start_ = fbb_.StartTable(); } - BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &); flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3295,7 +3849,8 @@ struct BidirectionalSequenceRNNOptionsBuilder inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions( flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, bool merge_outputs = false, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); @@ -3308,20 +3863,23 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef FullyConnectedOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_WEIGHTS_FORMAT = 6, VT_KEEP_NUM_DIMS = 8, VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } - FullyConnectedOptionsWeightsFormat weights_format() const + onert_tflite::FullyConnectedOptionsWeightsFormat weights_format() const { - return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); + return static_cast<onert_tflite::FullyConnectedOptionsWeightsFormat>( + GetField<int8_t>(VT_WEIGHTS_FORMAT, 0)); } bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3340,14 +3898,15 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl struct FullyConnectedOptionsBuilder { + typedef FullyConnectedOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); } - void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format) + void add_weights_format(onert_tflite::FullyConnectedOptionsWeightsFormat weights_format) { fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT, static_cast<int8_t>(weights_format), 0); @@ -3366,7 +3925,6 @@ struct FullyConnectedOptionsBuilder { start_ = fbb_.StartTable(); } - FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); flatbuffers::Offset<FullyConnectedOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3375,11 +3933,13 @@ struct FullyConnectedOptionsBuilder } }; -inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT, - bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) +inline flatbuffers::Offset<FullyConnectedOptions> +CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, + onert_tflite::FullyConnectedOptionsWeightsFormat weights_format = + onert_tflite::FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3391,7 +3951,8 @@ inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions( struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SoftmaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BETA = 4 }; @@ -3405,6 +3966,7 @@ struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SoftmaxOptionsBuilder { + typedef SoftmaxOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); } @@ -3412,7 +3974,6 @@ struct SoftmaxOptionsBuilder { start_ = fbb_.StartTable(); } - SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); flatbuffers::Offset<SoftmaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3431,15 +3992,17 @@ CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ConcatenationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_AXIS = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 }; int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3450,10 +4013,11 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ConcatenationOptionsBuilder { + typedef ConcatenationOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); } - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3462,7 +4026,6 @@ struct ConcatenationOptionsBuilder { start_ = fbb_.StartTable(); } - ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); flatbuffers::Offset<ConcatenationOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3471,9 +4034,10 @@ struct ConcatenationOptionsBuilder } }; -inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) +inline flatbuffers::Offset<ConcatenationOptions> +CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); @@ -3483,35 +4047,45 @@ inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions( struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef AddOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } + bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable(); } }; struct AddOptionsBuilder { + typedef AddOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); } + void add_pot_scale_int16(bool pot_scale_int16) + { + fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), + 1); + } explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - AddOptionsBuilder &operator=(const AddOptionsBuilder &); flatbuffers::Offset<AddOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3522,22 +4096,27 @@ struct AddOptionsBuilder inline flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { AddOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef MulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3548,9 +4127,10 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct MulOptionsBuilder { + typedef MulOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3559,7 +4139,6 @@ struct MulOptionsBuilder { start_ = fbb_.StartTable(); } - MulOptionsBuilder &operator=(const MulOptionsBuilder &); flatbuffers::Offset<MulOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3570,7 +4149,8 @@ struct MulOptionsBuilder inline flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE) { MulOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -3579,13 +4159,15 @@ CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef L2NormOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3596,9 +4178,10 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct L2NormOptionsBuilder { + typedef L2NormOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3607,7 +4190,6 @@ struct L2NormOptionsBuilder { start_ = fbb_.StartTable(); } - L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); flatbuffers::Offset<L2NormOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3618,7 +4200,8 @@ struct L2NormOptionsBuilder inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE) { L2NormOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -3627,7 +4210,8 @@ CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef LocalResponseNormalizationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_RADIUS = 4, VT_BIAS = 6, @@ -3648,6 +4232,7 @@ struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatb struct LocalResponseNormalizationOptionsBuilder { + typedef LocalResponseNormalizationOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_radius(int32_t radius) @@ -3671,8 +4256,6 @@ struct LocalResponseNormalizationOptionsBuilder { start_ = fbb_.StartTable(); } - LocalResponseNormalizationOptionsBuilder & - operator=(const LocalResponseNormalizationOptionsBuilder &); flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3695,7 +4278,8 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, in struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef LSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, @@ -3703,15 +4287,16 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_KERNEL_TYPE = 10, VT_ASYMMETRIC_QUANTIZE_INPUTS = 12 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } - LSTMKernelType kernel_type() const + onert_tflite::LSTMKernelType kernel_type() const { - return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0)); + return static_cast<onert_tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0)); } bool asymmetric_quantize_inputs() const { @@ -3730,9 +4315,10 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LSTMOptionsBuilder { + typedef LSTMOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3745,7 +4331,7 @@ struct LSTMOptionsBuilder { fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); } - void add_kernel_type(LSTMKernelType kernel_type) + void add_kernel_type(onert_tflite::LSTMKernelType kernel_type) { fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0); } @@ -3758,7 +4344,6 @@ struct LSTMOptionsBuilder { start_ = fbb_.StartTable(); } - LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); flatbuffers::Offset<LSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3769,9 +4354,10 @@ struct LSTMOptionsBuilder inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, float cell_clip = 0.0f, float proj_clip = 0.0f, - LSTMKernelType kernel_type = LSTMKernelType_FULL, + onert_tflite::LSTMKernelType kernel_type = onert_tflite::LSTMKernelType_FULL, bool asymmetric_quantize_inputs = false) { LSTMOptionsBuilder builder_(_fbb); @@ -3785,7 +4371,8 @@ CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef UnidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, @@ -3793,9 +4380,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb VT_TIME_MAJOR = 10, VT_ASYMMETRIC_QUANTIZE_INPUTS = 12 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } @@ -3817,9 +4405,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb struct UnidirectionalSequenceLSTMOptionsBuilder { + typedef UnidirectionalSequenceLSTMOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3847,8 +4436,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder { start_ = fbb_.StartTable(); } - UnidirectionalSequenceLSTMOptionsBuilder & - operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3860,7 +4447,8 @@ struct UnidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, bool asymmetric_quantize_inputs = false) { @@ -3875,7 +4463,8 @@ CreateUnidirectionalSequenceLSTMOptions( struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef BidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, @@ -3884,9 +4473,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu VT_TIME_MAJOR = 12, VT_ASYMMETRIC_QUANTIZE_INPUTS = 14 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); } @@ -3910,9 +4500,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu struct BidirectionalSequenceLSTMOptionsBuilder { + typedef BidirectionalSequenceLSTMOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -3945,8 +4536,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder { start_ = fbb_.StartTable(); } - BidirectionalSequenceLSTMOptionsBuilder & - operator=(const BidirectionalSequenceLSTMOptionsBuilder &); flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -3957,7 +4546,8 @@ struct BidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, bool time_major = true, bool asymmetric_quantize_inputs = false) { @@ -3973,7 +4563,8 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ResizeBilinearOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_ALIGN_CORNERS = 8, VT_HALF_PIXEL_CENTERS = 10 @@ -3989,6 +4580,7 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl struct ResizeBilinearOptionsBuilder { + typedef ResizeBilinearOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_align_corners(bool align_corners) @@ -4005,7 +4597,6 @@ struct ResizeBilinearOptionsBuilder { start_ = fbb_.StartTable(); } - ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); flatbuffers::Offset<ResizeBilinearOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4026,20 +4617,24 @@ CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_cor struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ResizeNearestNeighborOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ALIGN_CORNERS = 4 + VT_ALIGN_CORNERS = 4, + VT_HALF_PIXEL_CENTERS = 6 }; bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; } + bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) && - verifier.EndTable(); + VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable(); } }; struct ResizeNearestNeighborOptionsBuilder { + typedef ResizeNearestNeighborOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_align_corners(bool align_corners) @@ -4047,11 +4642,15 @@ struct ResizeNearestNeighborOptionsBuilder fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0); } + void add_half_pixel_centers(bool half_pixel_centers) + { + fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, + static_cast<uint8_t>(half_pixel_centers), 0); + } explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &); flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4061,16 +4660,19 @@ struct ResizeNearestNeighborOptionsBuilder }; inline flatbuffers::Offset<ResizeNearestNeighborOptions> -CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) +CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false, + bool half_pixel_centers = false) { ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); builder_.add_align_corners(align_corners); return builder_.Finish(); } struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef CallOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_SUBGRAPH = 4 }; @@ -4084,6 +4686,7 @@ struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct CallOptionsBuilder { + typedef CallOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_subgraph(uint32_t subgraph) @@ -4094,7 +4697,6 @@ struct CallOptionsBuilder { start_ = fbb_.StartTable(); } - CallOptionsBuilder &operator=(const CallOptionsBuilder &); flatbuffers::Offset<CallOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4113,6 +4715,7 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBuffe struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PadOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4121,13 +4724,13 @@ struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct PadOptionsBuilder { + typedef PadOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - PadOptionsBuilder &operator=(const PadOptionsBuilder &); flatbuffers::Offset<PadOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4144,6 +4747,7 @@ inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferB struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PadV2OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4152,13 +4756,13 @@ struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct PadV2OptionsBuilder { + typedef PadV2Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &); flatbuffers::Offset<PadV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -4175,7 +4779,8 @@ inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBuf struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ReshapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NEW_SHAPE = 4 }; @@ -4192,6 +4797,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ReshapeOptionsBuilder { + typedef ReshapeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) @@ -4202,7 +4808,6 @@ struct ReshapeOptionsBuilder { start_ = fbb_.StartTable(); } - ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); flatbuffers::Offset<ReshapeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4224,12 +4829,13 @@ inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *new_shape = nullptr) { - return onert_tflite::CreateReshapeOptions(_fbb, - new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0); + auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0; + return onert_tflite::CreateReshapeOptions(_fbb, new_shape__); } struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SpaceToBatchNDOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4238,13 +4844,13 @@ struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl struct SpaceToBatchNDOptionsBuilder { + typedef SpaceToBatchNDOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &); flatbuffers::Offset<SpaceToBatchNDOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4262,6 +4868,7 @@ CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb) struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BatchToSpaceNDOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4270,13 +4877,13 @@ struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl struct BatchToSpaceNDOptionsBuilder { + typedef BatchToSpaceNDOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &); flatbuffers::Offset<BatchToSpaceNDOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4294,7 +4901,8 @@ CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb) struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SkipGramOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NGRAM_SIZE = 4, VT_MAX_SKIP_SIZE = 6, @@ -4313,6 +4921,7 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SkipGramOptionsBuilder { + typedef SkipGramOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_ngram_size(int32_t ngram_size) @@ -4332,7 +4941,6 @@ struct SkipGramOptionsBuilder { start_ = fbb_.StartTable(); } - SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); flatbuffers::Offset<SkipGramOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4354,7 +4962,8 @@ CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SpaceToDepthOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BLOCK_SIZE = 4 }; @@ -4368,6 +4977,7 @@ struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SpaceToDepthOptionsBuilder { + typedef SpaceToDepthOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_block_size(int32_t block_size) @@ -4378,7 +4988,6 @@ struct SpaceToDepthOptionsBuilder { start_ = fbb_.StartTable(); } - SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); flatbuffers::Offset<SpaceToDepthOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4397,7 +5006,8 @@ CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef DepthToSpaceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BLOCK_SIZE = 4 }; @@ -4411,6 +5021,7 @@ struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct DepthToSpaceOptionsBuilder { + typedef DepthToSpaceOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_block_size(int32_t block_size) @@ -4421,7 +5032,6 @@ struct DepthToSpaceOptionsBuilder { start_ = fbb_.StartTable(); } - DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &); flatbuffers::Offset<DepthToSpaceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4440,35 +5050,45 @@ CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SubOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } + bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); + VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable(); } }; struct SubOptionsBuilder { + typedef SubOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); } + void add_pot_scale_int16(bool pot_scale_int16) + { + fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), + 1); + } explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SubOptionsBuilder &operator=(const SubOptionsBuilder &); flatbuffers::Offset<SubOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4479,22 +5099,27 @@ struct SubOptionsBuilder inline flatbuffers::Offset<SubOptions> CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { SubOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef DivOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_FUSED_ACTIVATION_FUNCTION = 4 }; - ActivationFunctionType fused_activation_function() const + onert_tflite::ActivationFunctionType fused_activation_function() const { - return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast<onert_tflite::ActivationFunctionType>( + GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4505,9 +5130,10 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct DivOptionsBuilder { + typedef DivOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function(ActivationFunctionType fused_activation_function) + void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function) { fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0); @@ -4516,7 +5142,6 @@ struct DivOptionsBuilder { start_ = fbb_.StartTable(); } - DivOptionsBuilder &operator=(const DivOptionsBuilder &); flatbuffers::Offset<DivOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4527,7 +5152,8 @@ struct DivOptionsBuilder inline flatbuffers::Offset<DivOptions> CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + onert_tflite::ActivationFunctionType fused_activation_function = + onert_tflite::ActivationFunctionType_NONE) { DivOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4536,6 +5162,7 @@ CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TopKV2OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4544,13 +5171,13 @@ struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct TopKV2OptionsBuilder { + typedef TopKV2Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &); flatbuffers::Offset<TopKV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -4567,13 +5194,14 @@ inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatB struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef EmbeddingLookupSparseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_COMBINER = 4 }; - CombinerType combiner() const + onert_tflite::CombinerType combiner() const { - return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0)); + return static_cast<onert_tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4584,9 +5212,10 @@ struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer struct EmbeddingLookupSparseOptionsBuilder { + typedef EmbeddingLookupSparseOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_combiner(CombinerType combiner) + void add_combiner(onert_tflite::CombinerType combiner) { fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast<int8_t>(combiner), 0); @@ -4595,7 +5224,6 @@ struct EmbeddingLookupSparseOptionsBuilder { start_ = fbb_.StartTable(); } - EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4604,9 +5232,9 @@ struct EmbeddingLookupSparseOptionsBuilder } }; -inline flatbuffers::Offset<EmbeddingLookupSparseOptions> -CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, - CombinerType combiner = CombinerType_SUM) +inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions( + flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::CombinerType combiner = onert_tflite::CombinerType_SUM) { EmbeddingLookupSparseOptionsBuilder builder_(_fbb); builder_.add_combiner(combiner); @@ -4615,28 +5243,35 @@ CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef GatherOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_AXIS = 4 + VT_AXIS = 4, + VT_BATCH_DIMS = 6 }; int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); } + int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) && - verifier.EndTable(); + VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable(); } }; struct GatherOptionsBuilder { + typedef GatherOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); } + void add_batch_dims(int32_t batch_dims) + { + fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0); + } explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - GatherOptionsBuilder &operator=(const GatherOptionsBuilder &); flatbuffers::Offset<GatherOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4645,16 +5280,18 @@ struct GatherOptionsBuilder } }; -inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, - int32_t axis = 0) +inline flatbuffers::Offset<GatherOptions> +CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0) { GatherOptionsBuilder builder_(_fbb); + builder_.add_batch_dims(batch_dims); builder_.add_axis(axis); return builder_.Finish(); } struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TransposeOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4663,13 +5300,13 @@ struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct TransposeOptionsBuilder { + typedef TransposeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &); flatbuffers::Offset<TransposeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4687,6 +5324,7 @@ CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb) struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4695,13 +5333,13 @@ struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ExpOptionsBuilder { + typedef ExpOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); flatbuffers::Offset<ExpOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4718,6 +5356,7 @@ inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferB struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CosOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -4726,13 +5365,13 @@ struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct CosOptionsBuilder { + typedef CosOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - CosOptionsBuilder &operator=(const CosOptionsBuilder &); flatbuffers::Offset<CosOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4749,7 +5388,8 @@ inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferB struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ReducerOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_KEEP_DIMS = 4 }; @@ -4763,6 +5403,7 @@ struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ReducerOptionsBuilder { + typedef ReducerOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_keep_dims(bool keep_dims) @@ -4773,7 +5414,6 @@ struct ReducerOptionsBuilder { start_ = fbb_.StartTable(); } - ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &); flatbuffers::Offset<ReducerOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4792,7 +5432,8 @@ CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = fals struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SqueezeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_SQUEEZE_DIMS = 4 }; @@ -4809,6 +5450,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SqueezeOptionsBuilder { + typedef SqueezeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) @@ -4819,7 +5461,6 @@ struct SqueezeOptionsBuilder { start_ = fbb_.StartTable(); } - SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &); flatbuffers::Offset<SqueezeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4841,13 +5482,14 @@ inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *squeeze_dims = nullptr) { - return onert_tflite::CreateSqueezeOptions( - _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0); + auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0; + return onert_tflite::CreateSqueezeOptions(_fbb, squeeze_dims__); } struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SplitOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NUM_SPLITS = 4 }; @@ -4861,6 +5503,7 @@ struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SplitOptionsBuilder { + typedef SplitOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_num_splits(int32_t num_splits) @@ -4871,7 +5514,6 @@ struct SplitOptionsBuilder { start_ = fbb_.StartTable(); } - SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); flatbuffers::Offset<SplitOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4890,7 +5532,8 @@ inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBuf struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SplitVOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NUM_SPLITS = 4 }; @@ -4904,6 +5547,7 @@ struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SplitVOptionsBuilder { + typedef SplitVOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_num_splits(int32_t num_splits) @@ -4914,7 +5558,6 @@ struct SplitVOptionsBuilder { start_ = fbb_.StartTable(); } - SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &); flatbuffers::Offset<SplitVOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -4933,7 +5576,8 @@ inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatB struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef StridedSliceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BEGIN_MASK = 4, VT_END_MASK = 6, @@ -4958,6 +5602,7 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct StridedSliceOptionsBuilder { + typedef StridedSliceOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_begin_mask(int32_t begin_mask) @@ -4984,7 +5629,6 @@ struct StridedSliceOptionsBuilder { start_ = fbb_.StartTable(); } - StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &); flatbuffers::Offset<StridedSliceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5009,6 +5653,7 @@ CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_ma struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogSoftmaxOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5017,13 +5662,13 @@ struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LogSoftmaxOptionsBuilder { + typedef LogSoftmaxOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &); flatbuffers::Offset<LogSoftmaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5041,18 +5686,19 @@ CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb) struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef CastOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_IN_DATA_TYPE = 4, VT_OUT_DATA_TYPE = 6 }; - TensorType in_data_type() const + onert_tflite::TensorType in_data_type() const { - return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0)); + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0)); } - TensorType out_data_type() const + onert_tflite::TensorType out_data_type() const { - return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0)); + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5063,13 +5709,14 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct CastOptionsBuilder { + typedef CastOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_in_data_type(TensorType in_data_type) + void add_in_data_type(onert_tflite::TensorType in_data_type) { fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0); } - void add_out_data_type(TensorType out_data_type) + void add_out_data_type(onert_tflite::TensorType out_data_type) { fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0); } @@ -5077,7 +5724,6 @@ struct CastOptionsBuilder { start_ = fbb_.StartTable(); } - CastOptionsBuilder &operator=(const CastOptionsBuilder &); flatbuffers::Offset<CastOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5088,8 +5734,8 @@ struct CastOptionsBuilder inline flatbuffers::Offset<CastOptions> CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, - TensorType in_data_type = TensorType_FLOAT32, - TensorType out_data_type = TensorType_FLOAT32) + onert_tflite::TensorType in_data_type = onert_tflite::TensorType_FLOAT32, + onert_tflite::TensorType out_data_type = onert_tflite::TensorType_FLOAT32) { CastOptionsBuilder builder_(_fbb); builder_.add_out_data_type(out_data_type); @@ -5099,6 +5745,7 @@ CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DequantizeOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5107,13 +5754,13 @@ struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct DequantizeOptionsBuilder { + typedef DequantizeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &); flatbuffers::Offset<DequantizeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5131,6 +5778,7 @@ CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb) struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MaximumMinimumOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5139,13 +5787,13 @@ struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl struct MaximumMinimumOptionsBuilder { + typedef MaximumMinimumOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &); flatbuffers::Offset<MaximumMinimumOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5163,6 +5811,7 @@ CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb) struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TileOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5171,13 +5820,13 @@ struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct TileOptionsBuilder { + typedef TileOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - TileOptionsBuilder &operator=(const TileOptionsBuilder &); flatbuffers::Offset<TileOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5194,13 +5843,14 @@ inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBuffe struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ArgMaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_OUTPUT_TYPE = 4 }; - TensorType output_type() const + onert_tflite::TensorType output_type() const { - return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5211,9 +5861,10 @@ struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ArgMaxOptionsBuilder { + typedef ArgMaxOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_output_type(TensorType output_type) + void add_output_type(onert_tflite::TensorType output_type) { fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); } @@ -5221,7 +5872,6 @@ struct ArgMaxOptionsBuilder { start_ = fbb_.StartTable(); } - ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &); flatbuffers::Offset<ArgMaxOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5232,7 +5882,7 @@ struct ArgMaxOptionsBuilder inline flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, - TensorType output_type = TensorType_FLOAT32) + onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32) { ArgMaxOptionsBuilder builder_(_fbb); builder_.add_output_type(output_type); @@ -5241,13 +5891,14 @@ CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ArgMinOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_OUTPUT_TYPE = 4 }; - TensorType output_type() const + onert_tflite::TensorType output_type() const { - return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5258,9 +5909,10 @@ struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ArgMinOptionsBuilder { + typedef ArgMinOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_output_type(TensorType output_type) + void add_output_type(onert_tflite::TensorType output_type) { fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0); } @@ -5268,7 +5920,6 @@ struct ArgMinOptionsBuilder { start_ = fbb_.StartTable(); } - ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &); flatbuffers::Offset<ArgMinOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5279,7 +5930,7 @@ struct ArgMinOptionsBuilder inline flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, - TensorType output_type = TensorType_FLOAT32) + onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32) { ArgMinOptionsBuilder builder_(_fbb); builder_.add_output_type(output_type); @@ -5288,6 +5939,7 @@ CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GreaterOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5296,13 +5948,13 @@ struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct GreaterOptionsBuilder { + typedef GreaterOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &); flatbuffers::Offset<GreaterOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5320,6 +5972,7 @@ CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb) struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GreaterEqualOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5328,13 +5981,13 @@ struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct GreaterEqualOptionsBuilder { + typedef GreaterEqualOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &); flatbuffers::Offset<GreaterEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5352,6 +6005,7 @@ CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LessOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5360,13 +6014,13 @@ struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LessOptionsBuilder { + typedef LessOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LessOptionsBuilder &operator=(const LessOptionsBuilder &); flatbuffers::Offset<LessOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5383,6 +6037,7 @@ inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBuffe struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LessEqualOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5391,13 +6046,13 @@ struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LessEqualOptionsBuilder { + typedef LessEqualOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &); flatbuffers::Offset<LessEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5415,6 +6070,7 @@ CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NegOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5423,13 +6079,13 @@ struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct NegOptionsBuilder { + typedef NegOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - NegOptionsBuilder &operator=(const NegOptionsBuilder &); flatbuffers::Offset<NegOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5446,6 +6102,7 @@ inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferB struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SelectOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5454,13 +6111,13 @@ struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SelectOptionsBuilder { + typedef SelectOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SelectOptionsBuilder &operator=(const SelectOptionsBuilder &); flatbuffers::Offset<SelectOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5477,6 +6134,7 @@ inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatB struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SliceOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5485,13 +6143,13 @@ struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SliceOptionsBuilder { + typedef SliceOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SliceOptionsBuilder &operator=(const SliceOptionsBuilder &); flatbuffers::Offset<SliceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5508,13 +6166,17 @@ inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBuf struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef TransposeConvOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PADDING = 4, VT_STRIDE_W = 6, VT_STRIDE_H = 8 }; - Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); } + onert_tflite::Padding padding() const + { + return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0)); + } int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); } int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); } bool Verify(flatbuffers::Verifier &verifier) const @@ -5527,9 +6189,10 @@ struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct TransposeConvOptionsBuilder { + typedef TransposeConvOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_padding(Padding padding) + void add_padding(onert_tflite::Padding padding) { fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0); } @@ -5545,7 +6208,6 @@ struct TransposeConvOptionsBuilder { start_ = fbb_.StartTable(); } - TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &); flatbuffers::Offset<TransposeConvOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5555,7 +6217,8 @@ struct TransposeConvOptionsBuilder }; inline flatbuffers::Offset<TransposeConvOptions> -CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, +CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::Padding padding = onert_tflite::Padding_SAME, int32_t stride_w = 0, int32_t stride_h = 0) { TransposeConvOptionsBuilder builder_(_fbb); @@ -5567,6 +6230,7 @@ CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpandDimsOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5575,13 +6239,13 @@ struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ExpandDimsOptionsBuilder { + typedef ExpandDimsOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &); flatbuffers::Offset<ExpandDimsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5599,7 +6263,8 @@ CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb) struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SparseToDenseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALIDATE_INDICES = 4 }; @@ -5613,6 +6278,7 @@ struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SparseToDenseOptionsBuilder { + typedef SparseToDenseOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_validate_indices(bool validate_indices) @@ -5624,7 +6290,6 @@ struct SparseToDenseOptionsBuilder { start_ = fbb_.StartTable(); } - SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &); flatbuffers::Offset<SparseToDenseOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5643,6 +6308,7 @@ CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_i struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef EqualOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5651,13 +6317,13 @@ struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct EqualOptionsBuilder { + typedef EqualOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - EqualOptionsBuilder &operator=(const EqualOptionsBuilder &); flatbuffers::Offset<EqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5674,6 +6340,7 @@ inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBuf struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NotEqualOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5682,13 +6349,13 @@ struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct NotEqualOptionsBuilder { + typedef NotEqualOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &); flatbuffers::Offset<NotEqualOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5706,11 +6373,15 @@ CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb) struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ShapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_OUT_TYPE = 4 }; - TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); } + onert_tflite::TensorType out_type() const + { + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) && @@ -5720,9 +6391,10 @@ struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ShapeOptionsBuilder { + typedef ShapeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_out_type(TensorType out_type) + void add_out_type(onert_tflite::TensorType out_type) { fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0); } @@ -5730,7 +6402,6 @@ struct ShapeOptionsBuilder { start_ = fbb_.StartTable(); } - ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &); flatbuffers::Offset<ShapeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5740,7 +6411,8 @@ struct ShapeOptionsBuilder }; inline flatbuffers::Offset<ShapeOptions> -CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32) +CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, + onert_tflite::TensorType out_type = onert_tflite::TensorType_FLOAT32) { ShapeOptionsBuilder builder_(_fbb); builder_.add_out_type(out_type); @@ -5749,6 +6421,7 @@ CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = T struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RankOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5757,13 +6430,13 @@ struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct RankOptionsBuilder { + typedef RankOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - RankOptionsBuilder &operator=(const RankOptionsBuilder &); flatbuffers::Offset<RankOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5780,6 +6453,7 @@ inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBuffe struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PowOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5788,13 +6462,13 @@ struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct PowOptionsBuilder { + typedef PowOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - PowOptionsBuilder &operator=(const PowOptionsBuilder &); flatbuffers::Offset<PowOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5811,7 +6485,8 @@ inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferB struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef FakeQuantOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MIN = 4, VT_MAX = 6, @@ -5832,6 +6507,7 @@ struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct FakeQuantOptionsBuilder { + typedef FakeQuantOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); } @@ -5849,7 +6525,6 @@ struct FakeQuantOptionsBuilder { start_ = fbb_.StartTable(); } - FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &); flatbuffers::Offset<FakeQuantOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5872,7 +6547,8 @@ CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, f struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef PackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUES_COUNT = 4, VT_AXIS = 6 @@ -5888,6 +6564,7 @@ struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct PackOptionsBuilder { + typedef PackOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_values_count(int32_t values_count) @@ -5899,7 +6576,6 @@ struct PackOptionsBuilder { start_ = fbb_.StartTable(); } - PackOptionsBuilder &operator=(const PackOptionsBuilder &); flatbuffers::Offset<PackOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5919,6 +6595,7 @@ CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0 struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalOrOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5927,13 +6604,13 @@ struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LogicalOrOptionsBuilder { + typedef LogicalOrOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &); flatbuffers::Offset<LogicalOrOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5951,7 +6628,8 @@ CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb) struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef OneHotOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_AXIS = 4 }; @@ -5965,6 +6643,7 @@ struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct OneHotOptionsBuilder { + typedef OneHotOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); } @@ -5972,7 +6651,6 @@ struct OneHotOptionsBuilder { start_ = fbb_.StartTable(); } - OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &); flatbuffers::Offset<OneHotOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -5991,6 +6669,7 @@ inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatB struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AbsOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -5999,13 +6678,13 @@ struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct AbsOptionsBuilder { + typedef AbsOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - AbsOptionsBuilder &operator=(const AbsOptionsBuilder &); flatbuffers::Offset<AbsOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6022,6 +6701,7 @@ inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferB struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HardSwishOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6030,13 +6710,13 @@ struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct HardSwishOptionsBuilder { + typedef HardSwishOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &); flatbuffers::Offset<HardSwishOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6054,6 +6734,7 @@ CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb) struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalAndOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6062,13 +6743,13 @@ struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LogicalAndOptionsBuilder { + typedef LogicalAndOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &); flatbuffers::Offset<LogicalAndOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6086,6 +6767,7 @@ CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb) struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalNotOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6094,13 +6776,13 @@ struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LogicalNotOptionsBuilder { + typedef LogicalNotOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &); flatbuffers::Offset<LogicalNotOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6118,7 +6800,8 @@ CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb) struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef UnpackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NUM = 4, VT_AXIS = 6 @@ -6134,6 +6817,7 @@ struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct UnpackOptionsBuilder { + typedef UnpackOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); } @@ -6142,7 +6826,6 @@ struct UnpackOptionsBuilder { start_ = fbb_.StartTable(); } - UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &); flatbuffers::Offset<UnpackOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6162,6 +6845,7 @@ inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatB struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorDivOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6170,13 +6854,13 @@ struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct FloorDivOptionsBuilder { + typedef FloorDivOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &); flatbuffers::Offset<FloorDivOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6194,6 +6878,7 @@ CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb) struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SquareOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6202,13 +6887,13 @@ struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SquareOptionsBuilder { + typedef SquareOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SquareOptionsBuilder &operator=(const SquareOptionsBuilder &); flatbuffers::Offset<SquareOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6225,6 +6910,7 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ZerosLikeOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6233,13 +6919,13 @@ struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ZerosLikeOptionsBuilder { + typedef ZerosLikeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &); flatbuffers::Offset<ZerosLikeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6257,6 +6943,7 @@ CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb) struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FillOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6265,13 +6952,13 @@ struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct FillOptionsBuilder { + typedef FillOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - FillOptionsBuilder &operator=(const FillOptionsBuilder &); flatbuffers::Offset<FillOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6288,6 +6975,7 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorModOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6296,13 +6984,13 @@ struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct FloorModOptionsBuilder { + typedef FloorModOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); flatbuffers::Offset<FloorModOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6320,6 +7008,7 @@ CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb) struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RangeOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6328,13 +7017,13 @@ struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct RangeOptionsBuilder { + typedef RangeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); flatbuffers::Offset<RangeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6351,7 +7040,8 @@ inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBuf struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef LeakyReluOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_ALPHA = 4 }; @@ -6365,6 +7055,7 @@ struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct LeakyReluOptionsBuilder { + typedef LeakyReluOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); } @@ -6372,7 +7063,6 @@ struct LeakyReluOptionsBuilder { start_ = fbb_.StartTable(); } - LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &); flatbuffers::Offset<LeakyReluOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6391,6 +7081,7 @@ CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f) struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SquaredDifferenceOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6399,13 +7090,13 @@ struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T struct SquaredDifferenceOptionsBuilder { + typedef SquaredDifferenceOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &); flatbuffers::Offset<SquaredDifferenceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6423,11 +7114,15 @@ CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb) struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef MirrorPadOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MODE = 4 }; - MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); } + onert_tflite::MirrorPadMode mode() const + { + return static_cast<onert_tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) && @@ -6437,9 +7132,10 @@ struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct MirrorPadOptionsBuilder { + typedef MirrorPadOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_mode(MirrorPadMode mode) + void add_mode(onert_tflite::MirrorPadMode mode) { fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0); } @@ -6447,7 +7143,6 @@ struct MirrorPadOptionsBuilder { start_ = fbb_.StartTable(); } - MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &); flatbuffers::Offset<MirrorPadOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6458,7 +7153,7 @@ struct MirrorPadOptionsBuilder inline flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, - MirrorPadMode mode = MirrorPadMode_REFLECT) + onert_tflite::MirrorPadMode mode = onert_tflite::MirrorPadMode_REFLECT) { MirrorPadOptionsBuilder builder_(_fbb); builder_.add_mode(mode); @@ -6467,13 +7162,14 @@ CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef UniqueOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_IDX_OUT_TYPE = 4 }; - TensorType idx_out_type() const + onert_tflite::TensorType idx_out_type() const { - return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2)); + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -6484,9 +7180,10 @@ struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct UniqueOptionsBuilder { + typedef UniqueOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_idx_out_type(TensorType idx_out_type) + void add_idx_out_type(onert_tflite::TensorType idx_out_type) { fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2); } @@ -6494,7 +7191,6 @@ struct UniqueOptionsBuilder { start_ = fbb_.StartTable(); } - UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &); flatbuffers::Offset<UniqueOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6505,7 +7201,7 @@ struct UniqueOptionsBuilder inline flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, - TensorType idx_out_type = TensorType_INT32) + onert_tflite::TensorType idx_out_type = onert_tflite::TensorType_INT32) { UniqueOptionsBuilder builder_(_fbb); builder_.add_idx_out_type(idx_out_type); @@ -6514,6 +7210,7 @@ CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReverseV2OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6522,13 +7219,13 @@ struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ReverseV2OptionsBuilder { + typedef ReverseV2Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &); flatbuffers::Offset<ReverseV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -6546,6 +7243,7 @@ CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb) struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AddNOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6554,13 +7252,13 @@ struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct AddNOptionsBuilder { + typedef AddNOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - AddNOptionsBuilder &operator=(const AddNOptionsBuilder &); flatbuffers::Offset<AddNOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6577,6 +7275,7 @@ inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBuffe struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GatherNdOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6585,13 +7284,13 @@ struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct GatherNdOptionsBuilder { + typedef GatherNdOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &); flatbuffers::Offset<GatherNdOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6609,6 +7308,7 @@ CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb) struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef WhereOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6617,13 +7317,13 @@ struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct WhereOptionsBuilder { + typedef WhereOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - WhereOptionsBuilder &operator=(const WhereOptionsBuilder &); flatbuffers::Offset<WhereOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6640,7 +7340,8 @@ inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBuf struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ReverseSequenceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_SEQ_DIM = 4, VT_BATCH_DIM = 6 @@ -6656,6 +7357,7 @@ struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab struct ReverseSequenceOptionsBuilder { + typedef ReverseSequenceOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_seq_dim(int32_t seq_dim) @@ -6670,7 +7372,6 @@ struct ReverseSequenceOptionsBuilder { start_ = fbb_.StartTable(); } - ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &); flatbuffers::Offset<ReverseSequenceOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6691,6 +7392,7 @@ CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_d struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MatrixDiagOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6699,13 +7401,13 @@ struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct MatrixDiagOptionsBuilder { + typedef MatrixDiagOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &); flatbuffers::Offset<MatrixDiagOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6723,6 +7425,7 @@ CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb) struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef QuantizeOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6731,13 +7434,13 @@ struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct QuantizeOptionsBuilder { + typedef QuantizeOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &); flatbuffers::Offset<QuantizeOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6755,6 +7458,7 @@ CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb) struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MatrixSetDiagOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6763,13 +7467,13 @@ struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct MatrixSetDiagOptionsBuilder { + typedef MatrixSetDiagOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &); flatbuffers::Offset<MatrixSetDiagOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6787,7 +7491,8 @@ CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb) struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef IfOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_THEN_SUBGRAPH_INDEX = 4, VT_ELSE_SUBGRAPH_INDEX = 6 @@ -6803,6 +7508,7 @@ struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct IfOptionsBuilder { + typedef IfOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_then_subgraph_index(int32_t then_subgraph_index) @@ -6817,7 +7523,6 @@ struct IfOptionsBuilder { start_ = fbb_.StartTable(); } - IfOptionsBuilder &operator=(const IfOptionsBuilder &); flatbuffers::Offset<IfOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6836,9 +7541,54 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui return builder_.Finish(); } +struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef CallOnceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_INIT_SUBGRAPH_INDEX = 4 + }; + int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) && + verifier.EndTable(); + } +}; + +struct CallOnceOptionsBuilder +{ + typedef CallOnceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_init_subgraph_index(int32_t init_subgraph_index) + { + fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0); + } + explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<CallOnceOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CallOnceOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CallOnceOptions> +CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0) +{ + CallOnceOptionsBuilder builder_(_fbb); + builder_.add_init_subgraph_index(init_subgraph_index); + return builder_.Finish(); +} + struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef WhileOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_COND_SUBGRAPH_INDEX = 4, VT_BODY_SUBGRAPH_INDEX = 6 @@ -6854,6 +7604,7 @@ struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct WhileOptionsBuilder { + typedef WhileOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_cond_subgraph_index(int32_t cond_subgraph_index) @@ -6868,7 +7619,6 @@ struct WhileOptionsBuilder { start_ = fbb_.StartTable(); } - WhileOptionsBuilder &operator=(const WhileOptionsBuilder &); flatbuffers::Offset<WhileOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6889,6 +7639,7 @@ inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBuf struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NonMaxSuppressionV4OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6897,13 +7648,13 @@ struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers: struct NonMaxSuppressionV4OptionsBuilder { + typedef NonMaxSuppressionV4Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &); flatbuffers::Offset<NonMaxSuppressionV4Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -6921,6 +7672,7 @@ CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb) struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NonMaxSuppressionV5OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6929,13 +7681,13 @@ struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers: struct NonMaxSuppressionV5OptionsBuilder { + typedef NonMaxSuppressionV5Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &); flatbuffers::Offset<NonMaxSuppressionV5Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -6953,6 +7705,7 @@ CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb) struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ScatterNdOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6961,13 +7714,13 @@ struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct ScatterNdOptionsBuilder { + typedef ScatterNdOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &); flatbuffers::Offset<ScatterNdOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -6985,6 +7738,7 @@ CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb) struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SelectV2OptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -6993,13 +7747,13 @@ struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SelectV2OptionsBuilder { + typedef SelectV2Options Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &); flatbuffers::Offset<SelectV2Options> Finish() { const auto end = fbb_.EndTable(start_); @@ -7017,6 +7771,7 @@ CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb) struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DensifyOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -7025,13 +7780,13 @@ struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct DensifyOptionsBuilder { + typedef DensifyOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &); flatbuffers::Offset<DensifyOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7049,6 +7804,7 @@ CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb) struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SegmentSumOptionsBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -7057,13 +7813,13 @@ struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SegmentSumOptionsBuilder { + typedef SegmentSumOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &); flatbuffers::Offset<SegmentSumOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7081,39 +7837,49 @@ CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb) struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef BatchMatMulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ADJOINT_LHS = 4, - VT_ADJOINT_RHS = 6 + VT_ADJ_X = 4, + VT_ADJ_Y = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 }; - bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; } - bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; } + bool adj_x() const { return GetField<uint8_t>(VT_ADJ_X, 0) != 0; } + bool adj_y() const { return GetField<uint8_t>(VT_ADJ_Y, 0) != 0; } + bool asymmetric_quantize_inputs() const + { + return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) && - VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable(); + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJ_X) && + VerifyField<uint8_t>(verifier, VT_ADJ_Y) && + VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable(); } }; struct BatchMatMulOptionsBuilder { + typedef BatchMatMulOptions Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_adjoint_lhs(bool adjoint_lhs) + void add_adj_x(bool adj_x) { - fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs), - 0); + fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0); } - void add_adjoint_rhs(bool adjoint_rhs) + void add_adj_y(bool adj_y) { - fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs), - 0); + fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) + { + fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, + static_cast<uint8_t>(asymmetric_quantize_inputs), 0); } explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &); flatbuffers::Offset<BatchMatMulOptions> Finish() { const auto end = fbb_.EndTable(start_); @@ -7123,47 +7889,517 @@ struct BatchMatMulOptionsBuilder }; inline flatbuffers::Offset<BatchMatMulOptions> -CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false, - bool adjoint_rhs = false) +CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adj_x = false, + bool adj_y = false, bool asymmetric_quantize_inputs = false) { BatchMatMulOptionsBuilder builder_(_fbb); - builder_.add_adjoint_rhs(adjoint_rhs); - builder_.add_adjoint_lhs(adjoint_lhs); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_adj_y(adj_y); + builder_.add_adj_x(adj_x); return builder_.Finish(); } -struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef CumsumOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BUILTIN_CODE = 4, - VT_CUSTOM_CODE = 6, - VT_VERSION = 8 + VT_EXCLUSIVE = 4, + VT_REVERSE = 6 + }; + bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; } + bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) && + VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable(); + } +}; + +struct CumsumOptionsBuilder +{ + typedef CumsumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_exclusive(bool exclusive) + { + fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0); + } + void add_reverse(bool reverse) + { + fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0); + } + explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<CumsumOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<CumsumOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, + bool exclusive = false, + bool reverse = false) +{ + CumsumOptionsBuilder builder_(_fbb); + builder_.add_reverse(reverse); + builder_.add_exclusive(exclusive); + return builder_.Finish(); +} + +struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef BroadcastToOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BroadcastToOptionsBuilder +{ + typedef BroadcastToOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<BroadcastToOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<BroadcastToOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<BroadcastToOptions> +CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + BroadcastToOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef Rfft2dOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Rfft2dOptionsBuilder +{ + typedef Rfft2dOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<Rfft2dOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<Rfft2dOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + Rfft2dOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_TABLE_ID = 4, + VT_KEY_DTYPE = 6, + VT_VALUE_DTYPE = 8 + }; + int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); } + onert_tflite::TensorType key_dtype() const + { + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0)); + } + onert_tflite::TensorType value_dtype() const + { + return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) && + VerifyField<int8_t>(verifier, VT_KEY_DTYPE) && + VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable(); + } +}; + +struct HashtableOptionsBuilder +{ + typedef HashtableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_table_id(int32_t table_id) + { + fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0); + } + void add_key_dtype(onert_tflite::TensorType key_dtype) + { + fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0); + } + void add_value_dtype(onert_tflite::TensorType value_dtype) + { + fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0); + } + explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableOptions> +CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0, + onert_tflite::TensorType key_dtype = onert_tflite::TensorType_FLOAT32, + onert_tflite::TensorType value_dtype = onert_tflite::TensorType_FLOAT32) +{ + HashtableOptionsBuilder builder_(_fbb); + builder_.add_table_id(table_id); + builder_.add_value_dtype(value_dtype); + builder_.add_key_dtype(key_dtype); + return builder_.Finish(); +} + +struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableFindOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableFindOptionsBuilder +{ + typedef HashtableFindOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableFindOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableFindOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableFindOptions> +CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableFindOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableImportOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableImportOptionsBuilder +{ + typedef HashtableImportOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableImportOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableImportOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableImportOptions> +CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableImportOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef HashtableSizeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct HashtableSizeOptionsBuilder +{ + typedef HashtableSizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<HashtableSizeOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<HashtableSizeOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<HashtableSizeOptions> +CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + HashtableSizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef VarHandleOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_CONTAINER = 4, + VT_SHARED_NAME = 6 + }; + const flatbuffers::String *container() const + { + return GetPointer<const flatbuffers::String *>(VT_CONTAINER); + } + const flatbuffers::String *shared_name() const + { + return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) && + verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) && + verifier.VerifyString(shared_name()) && verifier.EndTable(); + } +}; + +struct VarHandleOptionsBuilder +{ + typedef VarHandleOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_container(flatbuffers::Offset<flatbuffers::String> container) + { + fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container); + } + void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name) + { + fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name); + } + explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<VarHandleOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<VarHandleOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<VarHandleOptions> +CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::String> container = 0, + flatbuffers::Offset<flatbuffers::String> shared_name = 0) +{ + VarHandleOptionsBuilder builder_(_fbb); + builder_.add_shared_name(shared_name); + builder_.add_container(container); + return builder_.Finish(); +} + +inline flatbuffers::Offset<VarHandleOptions> +CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr, + const char *shared_name = nullptr) +{ + auto container__ = container ? _fbb.CreateString(container) : 0; + auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0; + return onert_tflite::CreateVarHandleOptions(_fbb, container__, shared_name__); +} + +struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef ReadVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ReadVariableOptionsBuilder +{ + typedef ReadVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<ReadVariableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<ReadVariableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<ReadVariableOptions> +CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + ReadVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef AssignVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct AssignVariableOptionsBuilder +{ + typedef AssignVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<AssignVariableOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<AssignVariableOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<AssignVariableOptions> +CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb) +{ + AssignVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef RandomOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_SEED = 4, + VT_SEED2 = 6 }; - BuiltinOperator builtin_code() const + int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); } + int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); } + bool Verify(flatbuffers::Verifier &verifier) const { - return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0)); + return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) && + VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable(); } +}; + +struct RandomOptionsBuilder +{ + typedef RandomOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); } + void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); } + explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<RandomOptions> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<RandomOptions>(end); + return o; + } +}; + +inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, + int32_t seed = 0, int32_t seed2 = 0) +{ + RandomOptionsBuilder builder_(_fbb); + builder_.add_seed2(seed2); + builder_.add_seed(seed); + return builder_.Finish(); +} + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef OperatorCodeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_DEPRECATED_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6, + VT_VERSION = 8, + VT_BUILTIN_CODE = 10 + }; + int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); } const flatbuffers::String *custom_code() const { return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE); } int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); } + onert_tflite::BuiltinOperator builtin_code() const + { + return static_cast<onert_tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) && + return VerifyTableStart(verifier) && + VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) && VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) && - VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable(); + VerifyField<int32_t>(verifier, VT_VERSION) && + VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable(); } }; struct OperatorCodeBuilder { + typedef OperatorCode Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_builtin_code(BuiltinOperator builtin_code) + void add_deprecated_builtin_code(int8_t deprecated_builtin_code) { - fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0); + fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0); } void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) { @@ -7173,11 +8409,14 @@ struct OperatorCodeBuilder { fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1); } + void add_builtin_code(onert_tflite::BuiltinOperator builtin_code) + { + fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0); + } explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); flatbuffers::Offset<OperatorCode> Finish() { const auto end = fbb_.EndTable(start_); @@ -7187,29 +8426,32 @@ struct OperatorCodeBuilder }; inline flatbuffers::Offset<OperatorCode> -CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, - BuiltinOperator builtin_code = BuiltinOperator_ADD, - flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1) +CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0, + flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1, + onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD) { OperatorCodeBuilder builder_(_fbb); + builder_.add_builtin_code(builtin_code); builder_.add_version(version); builder_.add_custom_code(custom_code); - builder_.add_builtin_code(builtin_code); + builder_.add_deprecated_builtin_code(deprecated_builtin_code); return builder_.Finish(); } -inline flatbuffers::Offset<OperatorCode> -CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, - BuiltinOperator builtin_code = BuiltinOperator_ADD, - const char *custom_code = nullptr, int32_t version = 1) +inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect( + flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0, + const char *custom_code = nullptr, int32_t version = 1, + onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD) { - return onert_tflite::CreateOperatorCode( - _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); + auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0; + return onert_tflite::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version, + builtin_code); } struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef OperatorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_OPCODE_INDEX = 4, VT_INPUTS = 6, @@ -7230,628 +8472,715 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); } - BuiltinOptions builtin_options_type() const + onert_tflite::BuiltinOptions builtin_options_type() const { - return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0)); + return static_cast<onert_tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0)); } const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); } template <typename T> const T *builtin_options_as() const; - const Conv2DOptions *builtin_options_as_Conv2DOptions() const + const onert_tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const { - return builtin_options_type() == BuiltinOptions_Conv2DOptions - ? static_cast<const Conv2DOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_Conv2DOptions + ? static_cast<const onert_tflite::Conv2DOptions *>(builtin_options()) : nullptr; } - const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const + const onert_tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { - return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions - ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_DepthwiseConv2DOptions + ? static_cast<const onert_tflite::DepthwiseConv2DOptions *>(builtin_options()) : nullptr; } - const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const + const onert_tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { - return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatEmbeddingsOptions + ? static_cast<const onert_tflite::ConcatEmbeddingsOptions *>(builtin_options()) : nullptr; } - const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const + const onert_tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { - return builtin_options_type() == BuiltinOptions_LSHProjectionOptions - ? static_cast<const LSHProjectionOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LSHProjectionOptions + ? static_cast<const onert_tflite::LSHProjectionOptions *>(builtin_options()) : nullptr; } - const Pool2DOptions *builtin_options_as_Pool2DOptions() const + const onert_tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const { - return builtin_options_type() == BuiltinOptions_Pool2DOptions - ? static_cast<const Pool2DOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_Pool2DOptions + ? static_cast<const onert_tflite::Pool2DOptions *>(builtin_options()) : nullptr; } - const SVDFOptions *builtin_options_as_SVDFOptions() const + const onert_tflite::SVDFOptions *builtin_options_as_SVDFOptions() const { - return builtin_options_type() == BuiltinOptions_SVDFOptions - ? static_cast<const SVDFOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SVDFOptions + ? static_cast<const onert_tflite::SVDFOptions *>(builtin_options()) : nullptr; } - const RNNOptions *builtin_options_as_RNNOptions() const + const onert_tflite::RNNOptions *builtin_options_as_RNNOptions() const { - return builtin_options_type() == BuiltinOptions_RNNOptions - ? static_cast<const RNNOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_RNNOptions + ? static_cast<const onert_tflite::RNNOptions *>(builtin_options()) : nullptr; } - const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const + const onert_tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { - return builtin_options_type() == BuiltinOptions_FullyConnectedOptions - ? static_cast<const FullyConnectedOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_FullyConnectedOptions + ? static_cast<const onert_tflite::FullyConnectedOptions *>(builtin_options()) : nullptr; } - const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const + const onert_tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { - return builtin_options_type() == BuiltinOptions_SoftmaxOptions - ? static_cast<const SoftmaxOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SoftmaxOptions + ? static_cast<const onert_tflite::SoftmaxOptions *>(builtin_options()) : nullptr; } - const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const + const onert_tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { - return builtin_options_type() == BuiltinOptions_ConcatenationOptions - ? static_cast<const ConcatenationOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatenationOptions + ? static_cast<const onert_tflite::ConcatenationOptions *>(builtin_options()) : nullptr; } - const AddOptions *builtin_options_as_AddOptions() const + const onert_tflite::AddOptions *builtin_options_as_AddOptions() const { - return builtin_options_type() == BuiltinOptions_AddOptions - ? static_cast<const AddOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_AddOptions + ? static_cast<const onert_tflite::AddOptions *>(builtin_options()) : nullptr; } - const L2NormOptions *builtin_options_as_L2NormOptions() const + const onert_tflite::L2NormOptions *builtin_options_as_L2NormOptions() const { - return builtin_options_type() == BuiltinOptions_L2NormOptions - ? static_cast<const L2NormOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_L2NormOptions + ? static_cast<const onert_tflite::L2NormOptions *>(builtin_options()) : nullptr; } - const LocalResponseNormalizationOptions * + const onert_tflite::LocalResponseNormalizationOptions * builtin_options_as_LocalResponseNormalizationOptions() const { - return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LocalResponseNormalizationOptions + ? static_cast<const onert_tflite::LocalResponseNormalizationOptions *>( + builtin_options()) : nullptr; } - const LSTMOptions *builtin_options_as_LSTMOptions() const + const onert_tflite::LSTMOptions *builtin_options_as_LSTMOptions() const { - return builtin_options_type() == BuiltinOptions_LSTMOptions - ? static_cast<const LSTMOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LSTMOptions + ? static_cast<const onert_tflite::LSTMOptions *>(builtin_options()) : nullptr; } - const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const + const onert_tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { - return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions - ? static_cast<const ResizeBilinearOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeBilinearOptions + ? static_cast<const onert_tflite::ResizeBilinearOptions *>(builtin_options()) : nullptr; } - const CallOptions *builtin_options_as_CallOptions() const + const onert_tflite::CallOptions *builtin_options_as_CallOptions() const { - return builtin_options_type() == BuiltinOptions_CallOptions - ? static_cast<const CallOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_CallOptions + ? static_cast<const onert_tflite::CallOptions *>(builtin_options()) : nullptr; } - const ReshapeOptions *builtin_options_as_ReshapeOptions() const + const onert_tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const { - return builtin_options_type() == BuiltinOptions_ReshapeOptions - ? static_cast<const ReshapeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ReshapeOptions + ? static_cast<const onert_tflite::ReshapeOptions *>(builtin_options()) : nullptr; } - const SkipGramOptions *builtin_options_as_SkipGramOptions() const + const onert_tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const { - return builtin_options_type() == BuiltinOptions_SkipGramOptions - ? static_cast<const SkipGramOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SkipGramOptions + ? static_cast<const onert_tflite::SkipGramOptions *>(builtin_options()) : nullptr; } - const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const + const onert_tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { - return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions - ? static_cast<const SpaceToDepthOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToDepthOptions + ? static_cast<const onert_tflite::SpaceToDepthOptions *>(builtin_options()) : nullptr; } - const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const + const onert_tflite::EmbeddingLookupSparseOptions * + builtin_options_as_EmbeddingLookupSparseOptions() const { - return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_EmbeddingLookupSparseOptions + ? static_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(builtin_options()) : nullptr; } - const MulOptions *builtin_options_as_MulOptions() const + const onert_tflite::MulOptions *builtin_options_as_MulOptions() const { - return builtin_options_type() == BuiltinOptions_MulOptions - ? static_cast<const MulOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_MulOptions + ? static_cast<const onert_tflite::MulOptions *>(builtin_options()) : nullptr; } - const PadOptions *builtin_options_as_PadOptions() const + const onert_tflite::PadOptions *builtin_options_as_PadOptions() const { - return builtin_options_type() == BuiltinOptions_PadOptions - ? static_cast<const PadOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_PadOptions + ? static_cast<const onert_tflite::PadOptions *>(builtin_options()) : nullptr; } - const GatherOptions *builtin_options_as_GatherOptions() const + const onert_tflite::GatherOptions *builtin_options_as_GatherOptions() const { - return builtin_options_type() == BuiltinOptions_GatherOptions - ? static_cast<const GatherOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_GatherOptions + ? static_cast<const onert_tflite::GatherOptions *>(builtin_options()) : nullptr; } - const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const + const onert_tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { - return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions - ? static_cast<const BatchToSpaceNDOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_BatchToSpaceNDOptions + ? static_cast<const onert_tflite::BatchToSpaceNDOptions *>(builtin_options()) : nullptr; } - const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const + const onert_tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { - return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions - ? static_cast<const SpaceToBatchNDOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToBatchNDOptions + ? static_cast<const onert_tflite::SpaceToBatchNDOptions *>(builtin_options()) : nullptr; } - const TransposeOptions *builtin_options_as_TransposeOptions() const + const onert_tflite::TransposeOptions *builtin_options_as_TransposeOptions() const { - return builtin_options_type() == BuiltinOptions_TransposeOptions - ? static_cast<const TransposeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeOptions + ? static_cast<const onert_tflite::TransposeOptions *>(builtin_options()) : nullptr; } - const ReducerOptions *builtin_options_as_ReducerOptions() const + const onert_tflite::ReducerOptions *builtin_options_as_ReducerOptions() const { - return builtin_options_type() == BuiltinOptions_ReducerOptions - ? static_cast<const ReducerOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ReducerOptions + ? static_cast<const onert_tflite::ReducerOptions *>(builtin_options()) : nullptr; } - const SubOptions *builtin_options_as_SubOptions() const + const onert_tflite::SubOptions *builtin_options_as_SubOptions() const { - return builtin_options_type() == BuiltinOptions_SubOptions - ? static_cast<const SubOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SubOptions + ? static_cast<const onert_tflite::SubOptions *>(builtin_options()) : nullptr; } - const DivOptions *builtin_options_as_DivOptions() const + const onert_tflite::DivOptions *builtin_options_as_DivOptions() const { - return builtin_options_type() == BuiltinOptions_DivOptions - ? static_cast<const DivOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_DivOptions + ? static_cast<const onert_tflite::DivOptions *>(builtin_options()) : nullptr; } - const SqueezeOptions *builtin_options_as_SqueezeOptions() const + const onert_tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const { - return builtin_options_type() == BuiltinOptions_SqueezeOptions - ? static_cast<const SqueezeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SqueezeOptions + ? static_cast<const onert_tflite::SqueezeOptions *>(builtin_options()) : nullptr; } - const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const + const onert_tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { - return builtin_options_type() == BuiltinOptions_SequenceRNNOptions - ? static_cast<const SequenceRNNOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SequenceRNNOptions + ? static_cast<const onert_tflite::SequenceRNNOptions *>(builtin_options()) : nullptr; } - const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const + const onert_tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { - return builtin_options_type() == BuiltinOptions_StridedSliceOptions - ? static_cast<const StridedSliceOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_StridedSliceOptions + ? static_cast<const onert_tflite::StridedSliceOptions *>(builtin_options()) : nullptr; } - const ExpOptions *builtin_options_as_ExpOptions() const + const onert_tflite::ExpOptions *builtin_options_as_ExpOptions() const { - return builtin_options_type() == BuiltinOptions_ExpOptions - ? static_cast<const ExpOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ExpOptions + ? static_cast<const onert_tflite::ExpOptions *>(builtin_options()) : nullptr; } - const TopKV2Options *builtin_options_as_TopKV2Options() const + const onert_tflite::TopKV2Options *builtin_options_as_TopKV2Options() const { - return builtin_options_type() == BuiltinOptions_TopKV2Options - ? static_cast<const TopKV2Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_TopKV2Options + ? static_cast<const onert_tflite::TopKV2Options *>(builtin_options()) : nullptr; } - const SplitOptions *builtin_options_as_SplitOptions() const + const onert_tflite::SplitOptions *builtin_options_as_SplitOptions() const { - return builtin_options_type() == BuiltinOptions_SplitOptions - ? static_cast<const SplitOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SplitOptions + ? static_cast<const onert_tflite::SplitOptions *>(builtin_options()) : nullptr; } - const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const + const onert_tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { - return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions - ? static_cast<const LogSoftmaxOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LogSoftmaxOptions + ? static_cast<const onert_tflite::LogSoftmaxOptions *>(builtin_options()) : nullptr; } - const CastOptions *builtin_options_as_CastOptions() const + const onert_tflite::CastOptions *builtin_options_as_CastOptions() const { - return builtin_options_type() == BuiltinOptions_CastOptions - ? static_cast<const CastOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_CastOptions + ? static_cast<const onert_tflite::CastOptions *>(builtin_options()) : nullptr; } - const DequantizeOptions *builtin_options_as_DequantizeOptions() const + const onert_tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const { - return builtin_options_type() == BuiltinOptions_DequantizeOptions - ? static_cast<const DequantizeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_DequantizeOptions + ? static_cast<const onert_tflite::DequantizeOptions *>(builtin_options()) : nullptr; } - const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const + const onert_tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { - return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions - ? static_cast<const MaximumMinimumOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_MaximumMinimumOptions + ? static_cast<const onert_tflite::MaximumMinimumOptions *>(builtin_options()) : nullptr; } - const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const + const onert_tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { - return builtin_options_type() == BuiltinOptions_ArgMaxOptions - ? static_cast<const ArgMaxOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMaxOptions + ? static_cast<const onert_tflite::ArgMaxOptions *>(builtin_options()) : nullptr; } - const LessOptions *builtin_options_as_LessOptions() const + const onert_tflite::LessOptions *builtin_options_as_LessOptions() const { - return builtin_options_type() == BuiltinOptions_LessOptions - ? static_cast<const LessOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LessOptions + ? static_cast<const onert_tflite::LessOptions *>(builtin_options()) : nullptr; } - const NegOptions *builtin_options_as_NegOptions() const + const onert_tflite::NegOptions *builtin_options_as_NegOptions() const { - return builtin_options_type() == BuiltinOptions_NegOptions - ? static_cast<const NegOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_NegOptions + ? static_cast<const onert_tflite::NegOptions *>(builtin_options()) : nullptr; } - const PadV2Options *builtin_options_as_PadV2Options() const + const onert_tflite::PadV2Options *builtin_options_as_PadV2Options() const { - return builtin_options_type() == BuiltinOptions_PadV2Options - ? static_cast<const PadV2Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_PadV2Options + ? static_cast<const onert_tflite::PadV2Options *>(builtin_options()) : nullptr; } - const GreaterOptions *builtin_options_as_GreaterOptions() const + const onert_tflite::GreaterOptions *builtin_options_as_GreaterOptions() const { - return builtin_options_type() == BuiltinOptions_GreaterOptions - ? static_cast<const GreaterOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterOptions + ? static_cast<const onert_tflite::GreaterOptions *>(builtin_options()) : nullptr; } - const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const + const onert_tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { - return builtin_options_type() == BuiltinOptions_GreaterEqualOptions - ? static_cast<const GreaterEqualOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterEqualOptions + ? static_cast<const onert_tflite::GreaterEqualOptions *>(builtin_options()) : nullptr; } - const LessEqualOptions *builtin_options_as_LessEqualOptions() const + const onert_tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const { - return builtin_options_type() == BuiltinOptions_LessEqualOptions - ? static_cast<const LessEqualOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LessEqualOptions + ? static_cast<const onert_tflite::LessEqualOptions *>(builtin_options()) : nullptr; } - const SelectOptions *builtin_options_as_SelectOptions() const + const onert_tflite::SelectOptions *builtin_options_as_SelectOptions() const { - return builtin_options_type() == BuiltinOptions_SelectOptions - ? static_cast<const SelectOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SelectOptions + ? static_cast<const onert_tflite::SelectOptions *>(builtin_options()) : nullptr; } - const SliceOptions *builtin_options_as_SliceOptions() const + const onert_tflite::SliceOptions *builtin_options_as_SliceOptions() const { - return builtin_options_type() == BuiltinOptions_SliceOptions - ? static_cast<const SliceOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SliceOptions + ? static_cast<const onert_tflite::SliceOptions *>(builtin_options()) : nullptr; } - const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const + const onert_tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { - return builtin_options_type() == BuiltinOptions_TransposeConvOptions - ? static_cast<const TransposeConvOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeConvOptions + ? static_cast<const onert_tflite::TransposeConvOptions *>(builtin_options()) : nullptr; } - const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const + const onert_tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { - return builtin_options_type() == BuiltinOptions_SparseToDenseOptions - ? static_cast<const SparseToDenseOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SparseToDenseOptions + ? static_cast<const onert_tflite::SparseToDenseOptions *>(builtin_options()) : nullptr; } - const TileOptions *builtin_options_as_TileOptions() const + const onert_tflite::TileOptions *builtin_options_as_TileOptions() const { - return builtin_options_type() == BuiltinOptions_TileOptions - ? static_cast<const TileOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_TileOptions + ? static_cast<const onert_tflite::TileOptions *>(builtin_options()) : nullptr; } - const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const + const onert_tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { - return builtin_options_type() == BuiltinOptions_ExpandDimsOptions - ? static_cast<const ExpandDimsOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ExpandDimsOptions + ? static_cast<const onert_tflite::ExpandDimsOptions *>(builtin_options()) : nullptr; } - const EqualOptions *builtin_options_as_EqualOptions() const + const onert_tflite::EqualOptions *builtin_options_as_EqualOptions() const { - return builtin_options_type() == BuiltinOptions_EqualOptions - ? static_cast<const EqualOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_EqualOptions + ? static_cast<const onert_tflite::EqualOptions *>(builtin_options()) : nullptr; } - const NotEqualOptions *builtin_options_as_NotEqualOptions() const + const onert_tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const { - return builtin_options_type() == BuiltinOptions_NotEqualOptions - ? static_cast<const NotEqualOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_NotEqualOptions + ? static_cast<const onert_tflite::NotEqualOptions *>(builtin_options()) : nullptr; } - const ShapeOptions *builtin_options_as_ShapeOptions() const + const onert_tflite::ShapeOptions *builtin_options_as_ShapeOptions() const { - return builtin_options_type() == BuiltinOptions_ShapeOptions - ? static_cast<const ShapeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ShapeOptions + ? static_cast<const onert_tflite::ShapeOptions *>(builtin_options()) : nullptr; } - const PowOptions *builtin_options_as_PowOptions() const + const onert_tflite::PowOptions *builtin_options_as_PowOptions() const { - return builtin_options_type() == BuiltinOptions_PowOptions - ? static_cast<const PowOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_PowOptions + ? static_cast<const onert_tflite::PowOptions *>(builtin_options()) : nullptr; } - const ArgMinOptions *builtin_options_as_ArgMinOptions() const + const onert_tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const { - return builtin_options_type() == BuiltinOptions_ArgMinOptions - ? static_cast<const ArgMinOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMinOptions + ? static_cast<const onert_tflite::ArgMinOptions *>(builtin_options()) : nullptr; } - const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const + const onert_tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { - return builtin_options_type() == BuiltinOptions_FakeQuantOptions - ? static_cast<const FakeQuantOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_FakeQuantOptions + ? static_cast<const onert_tflite::FakeQuantOptions *>(builtin_options()) : nullptr; } - const PackOptions *builtin_options_as_PackOptions() const + const onert_tflite::PackOptions *builtin_options_as_PackOptions() const { - return builtin_options_type() == BuiltinOptions_PackOptions - ? static_cast<const PackOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_PackOptions + ? static_cast<const onert_tflite::PackOptions *>(builtin_options()) : nullptr; } - const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const + const onert_tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { - return builtin_options_type() == BuiltinOptions_LogicalOrOptions - ? static_cast<const LogicalOrOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalOrOptions + ? static_cast<const onert_tflite::LogicalOrOptions *>(builtin_options()) : nullptr; } - const OneHotOptions *builtin_options_as_OneHotOptions() const + const onert_tflite::OneHotOptions *builtin_options_as_OneHotOptions() const { - return builtin_options_type() == BuiltinOptions_OneHotOptions - ? static_cast<const OneHotOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_OneHotOptions + ? static_cast<const onert_tflite::OneHotOptions *>(builtin_options()) : nullptr; } - const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const + const onert_tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { - return builtin_options_type() == BuiltinOptions_LogicalAndOptions - ? static_cast<const LogicalAndOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalAndOptions + ? static_cast<const onert_tflite::LogicalAndOptions *>(builtin_options()) : nullptr; } - const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const + const onert_tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { - return builtin_options_type() == BuiltinOptions_LogicalNotOptions - ? static_cast<const LogicalNotOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalNotOptions + ? static_cast<const onert_tflite::LogicalNotOptions *>(builtin_options()) : nullptr; } - const UnpackOptions *builtin_options_as_UnpackOptions() const + const onert_tflite::UnpackOptions *builtin_options_as_UnpackOptions() const { - return builtin_options_type() == BuiltinOptions_UnpackOptions - ? static_cast<const UnpackOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_UnpackOptions + ? static_cast<const onert_tflite::UnpackOptions *>(builtin_options()) : nullptr; } - const FloorDivOptions *builtin_options_as_FloorDivOptions() const + const onert_tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const { - return builtin_options_type() == BuiltinOptions_FloorDivOptions - ? static_cast<const FloorDivOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_FloorDivOptions + ? static_cast<const onert_tflite::FloorDivOptions *>(builtin_options()) : nullptr; } - const SquareOptions *builtin_options_as_SquareOptions() const + const onert_tflite::SquareOptions *builtin_options_as_SquareOptions() const { - return builtin_options_type() == BuiltinOptions_SquareOptions - ? static_cast<const SquareOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SquareOptions + ? static_cast<const onert_tflite::SquareOptions *>(builtin_options()) : nullptr; } - const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const + const onert_tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { - return builtin_options_type() == BuiltinOptions_ZerosLikeOptions - ? static_cast<const ZerosLikeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ZerosLikeOptions + ? static_cast<const onert_tflite::ZerosLikeOptions *>(builtin_options()) : nullptr; } - const FillOptions *builtin_options_as_FillOptions() const + const onert_tflite::FillOptions *builtin_options_as_FillOptions() const { - return builtin_options_type() == BuiltinOptions_FillOptions - ? static_cast<const FillOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_FillOptions + ? static_cast<const onert_tflite::FillOptions *>(builtin_options()) : nullptr; } - const BidirectionalSequenceLSTMOptions * + const onert_tflite::BidirectionalSequenceLSTMOptions * builtin_options_as_BidirectionalSequenceLSTMOptions() const { - return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions - ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions + ? static_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>( + builtin_options()) : nullptr; } - const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const + const onert_tflite::BidirectionalSequenceRNNOptions * + builtin_options_as_BidirectionalSequenceRNNOptions() const { - return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions - ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceRNNOptions + ? static_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr; } - const UnidirectionalSequenceLSTMOptions * + const onert_tflite::UnidirectionalSequenceLSTMOptions * builtin_options_as_UnidirectionalSequenceLSTMOptions() const { - return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions - ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions + ? static_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>( + builtin_options()) : nullptr; } - const FloorModOptions *builtin_options_as_FloorModOptions() const + const onert_tflite::FloorModOptions *builtin_options_as_FloorModOptions() const { - return builtin_options_type() == BuiltinOptions_FloorModOptions - ? static_cast<const FloorModOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_FloorModOptions + ? static_cast<const onert_tflite::FloorModOptions *>(builtin_options()) : nullptr; } - const RangeOptions *builtin_options_as_RangeOptions() const + const onert_tflite::RangeOptions *builtin_options_as_RangeOptions() const { - return builtin_options_type() == BuiltinOptions_RangeOptions - ? static_cast<const RangeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_RangeOptions + ? static_cast<const onert_tflite::RangeOptions *>(builtin_options()) : nullptr; } - const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const + const onert_tflite::ResizeNearestNeighborOptions * + builtin_options_as_ResizeNearestNeighborOptions() const { - return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions - ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeNearestNeighborOptions + ? static_cast<const onert_tflite::ResizeNearestNeighborOptions *>(builtin_options()) : nullptr; } - const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const + const onert_tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { - return builtin_options_type() == BuiltinOptions_LeakyReluOptions - ? static_cast<const LeakyReluOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_LeakyReluOptions + ? static_cast<const onert_tflite::LeakyReluOptions *>(builtin_options()) : nullptr; } - const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const + const onert_tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { - return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions - ? static_cast<const SquaredDifferenceOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SquaredDifferenceOptions + ? static_cast<const onert_tflite::SquaredDifferenceOptions *>(builtin_options()) : nullptr; } - const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const + const onert_tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { - return builtin_options_type() == BuiltinOptions_MirrorPadOptions - ? static_cast<const MirrorPadOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_MirrorPadOptions + ? static_cast<const onert_tflite::MirrorPadOptions *>(builtin_options()) : nullptr; } - const AbsOptions *builtin_options_as_AbsOptions() const + const onert_tflite::AbsOptions *builtin_options_as_AbsOptions() const { - return builtin_options_type() == BuiltinOptions_AbsOptions - ? static_cast<const AbsOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_AbsOptions + ? static_cast<const onert_tflite::AbsOptions *>(builtin_options()) : nullptr; } - const SplitVOptions *builtin_options_as_SplitVOptions() const + const onert_tflite::SplitVOptions *builtin_options_as_SplitVOptions() const { - return builtin_options_type() == BuiltinOptions_SplitVOptions - ? static_cast<const SplitVOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SplitVOptions + ? static_cast<const onert_tflite::SplitVOptions *>(builtin_options()) : nullptr; } - const UniqueOptions *builtin_options_as_UniqueOptions() const + const onert_tflite::UniqueOptions *builtin_options_as_UniqueOptions() const { - return builtin_options_type() == BuiltinOptions_UniqueOptions - ? static_cast<const UniqueOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_UniqueOptions + ? static_cast<const onert_tflite::UniqueOptions *>(builtin_options()) : nullptr; } - const ReverseV2Options *builtin_options_as_ReverseV2Options() const + const onert_tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const { - return builtin_options_type() == BuiltinOptions_ReverseV2Options - ? static_cast<const ReverseV2Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseV2Options + ? static_cast<const onert_tflite::ReverseV2Options *>(builtin_options()) : nullptr; } - const AddNOptions *builtin_options_as_AddNOptions() const + const onert_tflite::AddNOptions *builtin_options_as_AddNOptions() const { - return builtin_options_type() == BuiltinOptions_AddNOptions - ? static_cast<const AddNOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_AddNOptions + ? static_cast<const onert_tflite::AddNOptions *>(builtin_options()) : nullptr; } - const GatherNdOptions *builtin_options_as_GatherNdOptions() const + const onert_tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const { - return builtin_options_type() == BuiltinOptions_GatherNdOptions - ? static_cast<const GatherNdOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_GatherNdOptions + ? static_cast<const onert_tflite::GatherNdOptions *>(builtin_options()) : nullptr; } - const CosOptions *builtin_options_as_CosOptions() const + const onert_tflite::CosOptions *builtin_options_as_CosOptions() const { - return builtin_options_type() == BuiltinOptions_CosOptions - ? static_cast<const CosOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_CosOptions + ? static_cast<const onert_tflite::CosOptions *>(builtin_options()) : nullptr; } - const WhereOptions *builtin_options_as_WhereOptions() const + const onert_tflite::WhereOptions *builtin_options_as_WhereOptions() const { - return builtin_options_type() == BuiltinOptions_WhereOptions - ? static_cast<const WhereOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_WhereOptions + ? static_cast<const onert_tflite::WhereOptions *>(builtin_options()) : nullptr; } - const RankOptions *builtin_options_as_RankOptions() const + const onert_tflite::RankOptions *builtin_options_as_RankOptions() const { - return builtin_options_type() == BuiltinOptions_RankOptions - ? static_cast<const RankOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_RankOptions + ? static_cast<const onert_tflite::RankOptions *>(builtin_options()) : nullptr; } - const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const + const onert_tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { - return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions - ? static_cast<const ReverseSequenceOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseSequenceOptions + ? static_cast<const onert_tflite::ReverseSequenceOptions *>(builtin_options()) : nullptr; } - const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const + const onert_tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { - return builtin_options_type() == BuiltinOptions_MatrixDiagOptions - ? static_cast<const MatrixDiagOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixDiagOptions + ? static_cast<const onert_tflite::MatrixDiagOptions *>(builtin_options()) : nullptr; } - const QuantizeOptions *builtin_options_as_QuantizeOptions() const + const onert_tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const { - return builtin_options_type() == BuiltinOptions_QuantizeOptions - ? static_cast<const QuantizeOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_QuantizeOptions + ? static_cast<const onert_tflite::QuantizeOptions *>(builtin_options()) : nullptr; } - const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const + const onert_tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { - return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions - ? static_cast<const MatrixSetDiagOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixSetDiagOptions + ? static_cast<const onert_tflite::MatrixSetDiagOptions *>(builtin_options()) : nullptr; } - const HardSwishOptions *builtin_options_as_HardSwishOptions() const + const onert_tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const { - return builtin_options_type() == BuiltinOptions_HardSwishOptions - ? static_cast<const HardSwishOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_HardSwishOptions + ? static_cast<const onert_tflite::HardSwishOptions *>(builtin_options()) : nullptr; } - const IfOptions *builtin_options_as_IfOptions() const + const onert_tflite::IfOptions *builtin_options_as_IfOptions() const { - return builtin_options_type() == BuiltinOptions_IfOptions - ? static_cast<const IfOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_IfOptions + ? static_cast<const onert_tflite::IfOptions *>(builtin_options()) : nullptr; } - const WhileOptions *builtin_options_as_WhileOptions() const + const onert_tflite::WhileOptions *builtin_options_as_WhileOptions() const { - return builtin_options_type() == BuiltinOptions_WhileOptions - ? static_cast<const WhileOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_WhileOptions + ? static_cast<const onert_tflite::WhileOptions *>(builtin_options()) : nullptr; } - const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const + const onert_tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { - return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions - ? static_cast<const DepthToSpaceOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_DepthToSpaceOptions + ? static_cast<const onert_tflite::DepthToSpaceOptions *>(builtin_options()) : nullptr; } - const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const + const onert_tflite::NonMaxSuppressionV4Options * + builtin_options_as_NonMaxSuppressionV4Options() const { - return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options - ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV4Options + ? static_cast<const onert_tflite::NonMaxSuppressionV4Options *>(builtin_options()) : nullptr; } - const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const + const onert_tflite::NonMaxSuppressionV5Options * + builtin_options_as_NonMaxSuppressionV5Options() const { - return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options - ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV5Options + ? static_cast<const onert_tflite::NonMaxSuppressionV5Options *>(builtin_options()) : nullptr; } - const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const + const onert_tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { - return builtin_options_type() == BuiltinOptions_ScatterNdOptions - ? static_cast<const ScatterNdOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_ScatterNdOptions + ? static_cast<const onert_tflite::ScatterNdOptions *>(builtin_options()) : nullptr; } - const SelectV2Options *builtin_options_as_SelectV2Options() const + const onert_tflite::SelectV2Options *builtin_options_as_SelectV2Options() const { - return builtin_options_type() == BuiltinOptions_SelectV2Options - ? static_cast<const SelectV2Options *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SelectV2Options + ? static_cast<const onert_tflite::SelectV2Options *>(builtin_options()) : nullptr; } - const DensifyOptions *builtin_options_as_DensifyOptions() const + const onert_tflite::DensifyOptions *builtin_options_as_DensifyOptions() const { - return builtin_options_type() == BuiltinOptions_DensifyOptions - ? static_cast<const DensifyOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_DensifyOptions + ? static_cast<const onert_tflite::DensifyOptions *>(builtin_options()) : nullptr; } - const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const + const onert_tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { - return builtin_options_type() == BuiltinOptions_SegmentSumOptions - ? static_cast<const SegmentSumOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_SegmentSumOptions + ? static_cast<const onert_tflite::SegmentSumOptions *>(builtin_options()) : nullptr; } - const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const + const onert_tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { - return builtin_options_type() == BuiltinOptions_BatchMatMulOptions - ? static_cast<const BatchMatMulOptions *>(builtin_options()) + return builtin_options_type() == onert_tflite::BuiltinOptions_BatchMatMulOptions + ? static_cast<const onert_tflite::BatchMatMulOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::CumsumOptions *builtin_options_as_CumsumOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_CumsumOptions + ? static_cast<const onert_tflite::CumsumOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_CallOnceOptions + ? static_cast<const onert_tflite::CallOnceOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_BroadcastToOptions + ? static_cast<const onert_tflite::BroadcastToOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_Rfft2dOptions + ? static_cast<const onert_tflite::Rfft2dOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_Conv3DOptions + ? static_cast<const onert_tflite::Conv3DOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::HashtableOptions *builtin_options_as_HashtableOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableOptions + ? static_cast<const onert_tflite::HashtableOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableFindOptions + ? static_cast<const onert_tflite::HashtableFindOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableImportOptions + ? static_cast<const onert_tflite::HashtableImportOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableSizeOptions + ? static_cast<const onert_tflite::HashtableSizeOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_VarHandleOptions + ? static_cast<const onert_tflite::VarHandleOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_ReadVariableOptions + ? static_cast<const onert_tflite::ReadVariableOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_AssignVariableOptions + ? static_cast<const onert_tflite::AssignVariableOptions *>(builtin_options()) + : nullptr; + } + const onert_tflite::RandomOptions *builtin_options_as_RandomOptions() const + { + return builtin_options_type() == onert_tflite::BuiltinOptions_RandomOptions + ? static_cast<const onert_tflite::RandomOptions *>(builtin_options()) : nullptr; } const flatbuffers::Vector<uint8_t> *custom_options() const { return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS); } - CustomOptionsFormat custom_options_format() const + onert_tflite::CustomOptionsFormat custom_options_format() const { - return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0)); + return static_cast<onert_tflite::CustomOptionsFormat>( + GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0)); } const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const { @@ -7878,550 +9207,806 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table } }; -template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const +template <> +inline const onert_tflite::Conv2DOptions * +Operator::builtin_options_as<onert_tflite::Conv2DOptions>() const { return builtin_options_as_Conv2DOptions(); } template <> -inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const +inline const onert_tflite::DepthwiseConv2DOptions * +Operator::builtin_options_as<onert_tflite::DepthwiseConv2DOptions>() const { return builtin_options_as_DepthwiseConv2DOptions(); } template <> -inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const +inline const onert_tflite::ConcatEmbeddingsOptions * +Operator::builtin_options_as<onert_tflite::ConcatEmbeddingsOptions>() const { return builtin_options_as_ConcatEmbeddingsOptions(); } template <> -inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const +inline const onert_tflite::LSHProjectionOptions * +Operator::builtin_options_as<onert_tflite::LSHProjectionOptions>() const { return builtin_options_as_LSHProjectionOptions(); } -template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const +template <> +inline const onert_tflite::Pool2DOptions * +Operator::builtin_options_as<onert_tflite::Pool2DOptions>() const { return builtin_options_as_Pool2DOptions(); } -template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const +template <> +inline const onert_tflite::SVDFOptions * +Operator::builtin_options_as<onert_tflite::SVDFOptions>() const { return builtin_options_as_SVDFOptions(); } -template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const +template <> +inline const onert_tflite::RNNOptions * +Operator::builtin_options_as<onert_tflite::RNNOptions>() const { return builtin_options_as_RNNOptions(); } template <> -inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const +inline const onert_tflite::FullyConnectedOptions * +Operator::builtin_options_as<onert_tflite::FullyConnectedOptions>() const { return builtin_options_as_FullyConnectedOptions(); } -template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const +template <> +inline const onert_tflite::SoftmaxOptions * +Operator::builtin_options_as<onert_tflite::SoftmaxOptions>() const { return builtin_options_as_SoftmaxOptions(); } template <> -inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const +inline const onert_tflite::ConcatenationOptions * +Operator::builtin_options_as<onert_tflite::ConcatenationOptions>() const { return builtin_options_as_ConcatenationOptions(); } -template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const +template <> +inline const onert_tflite::AddOptions * +Operator::builtin_options_as<onert_tflite::AddOptions>() const { return builtin_options_as_AddOptions(); } -template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const +template <> +inline const onert_tflite::L2NormOptions * +Operator::builtin_options_as<onert_tflite::L2NormOptions>() const { return builtin_options_as_L2NormOptions(); } template <> -inline const LocalResponseNormalizationOptions * -Operator::builtin_options_as<LocalResponseNormalizationOptions>() const +inline const onert_tflite::LocalResponseNormalizationOptions * +Operator::builtin_options_as<onert_tflite::LocalResponseNormalizationOptions>() const { return builtin_options_as_LocalResponseNormalizationOptions(); } -template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const +template <> +inline const onert_tflite::LSTMOptions * +Operator::builtin_options_as<onert_tflite::LSTMOptions>() const { return builtin_options_as_LSTMOptions(); } template <> -inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const +inline const onert_tflite::ResizeBilinearOptions * +Operator::builtin_options_as<onert_tflite::ResizeBilinearOptions>() const { return builtin_options_as_ResizeBilinearOptions(); } -template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const +template <> +inline const onert_tflite::CallOptions * +Operator::builtin_options_as<onert_tflite::CallOptions>() const { return builtin_options_as_CallOptions(); } -template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const +template <> +inline const onert_tflite::ReshapeOptions * +Operator::builtin_options_as<onert_tflite::ReshapeOptions>() const { return builtin_options_as_ReshapeOptions(); } -template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const +template <> +inline const onert_tflite::SkipGramOptions * +Operator::builtin_options_as<onert_tflite::SkipGramOptions>() const { return builtin_options_as_SkipGramOptions(); } template <> -inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const +inline const onert_tflite::SpaceToDepthOptions * +Operator::builtin_options_as<onert_tflite::SpaceToDepthOptions>() const { return builtin_options_as_SpaceToDepthOptions(); } template <> -inline const EmbeddingLookupSparseOptions * -Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const +inline const onert_tflite::EmbeddingLookupSparseOptions * +Operator::builtin_options_as<onert_tflite::EmbeddingLookupSparseOptions>() const { return builtin_options_as_EmbeddingLookupSparseOptions(); } -template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const +template <> +inline const onert_tflite::MulOptions * +Operator::builtin_options_as<onert_tflite::MulOptions>() const { return builtin_options_as_MulOptions(); } -template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const +template <> +inline const onert_tflite::PadOptions * +Operator::builtin_options_as<onert_tflite::PadOptions>() const { return builtin_options_as_PadOptions(); } -template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const +template <> +inline const onert_tflite::GatherOptions * +Operator::builtin_options_as<onert_tflite::GatherOptions>() const { return builtin_options_as_GatherOptions(); } template <> -inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const +inline const onert_tflite::BatchToSpaceNDOptions * +Operator::builtin_options_as<onert_tflite::BatchToSpaceNDOptions>() const { return builtin_options_as_BatchToSpaceNDOptions(); } template <> -inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const +inline const onert_tflite::SpaceToBatchNDOptions * +Operator::builtin_options_as<onert_tflite::SpaceToBatchNDOptions>() const { return builtin_options_as_SpaceToBatchNDOptions(); } -template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const +template <> +inline const onert_tflite::TransposeOptions * +Operator::builtin_options_as<onert_tflite::TransposeOptions>() const { return builtin_options_as_TransposeOptions(); } -template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const +template <> +inline const onert_tflite::ReducerOptions * +Operator::builtin_options_as<onert_tflite::ReducerOptions>() const { return builtin_options_as_ReducerOptions(); } -template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const +template <> +inline const onert_tflite::SubOptions * +Operator::builtin_options_as<onert_tflite::SubOptions>() const { return builtin_options_as_SubOptions(); } -template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const +template <> +inline const onert_tflite::DivOptions * +Operator::builtin_options_as<onert_tflite::DivOptions>() const { return builtin_options_as_DivOptions(); } -template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const +template <> +inline const onert_tflite::SqueezeOptions * +Operator::builtin_options_as<onert_tflite::SqueezeOptions>() const { return builtin_options_as_SqueezeOptions(); } template <> -inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const +inline const onert_tflite::SequenceRNNOptions * +Operator::builtin_options_as<onert_tflite::SequenceRNNOptions>() const { return builtin_options_as_SequenceRNNOptions(); } template <> -inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const +inline const onert_tflite::StridedSliceOptions * +Operator::builtin_options_as<onert_tflite::StridedSliceOptions>() const { return builtin_options_as_StridedSliceOptions(); } -template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const +template <> +inline const onert_tflite::ExpOptions * +Operator::builtin_options_as<onert_tflite::ExpOptions>() const { return builtin_options_as_ExpOptions(); } -template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const +template <> +inline const onert_tflite::TopKV2Options * +Operator::builtin_options_as<onert_tflite::TopKV2Options>() const { return builtin_options_as_TopKV2Options(); } -template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const +template <> +inline const onert_tflite::SplitOptions * +Operator::builtin_options_as<onert_tflite::SplitOptions>() const { return builtin_options_as_SplitOptions(); } -template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const +template <> +inline const onert_tflite::LogSoftmaxOptions * +Operator::builtin_options_as<onert_tflite::LogSoftmaxOptions>() const { return builtin_options_as_LogSoftmaxOptions(); } -template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const +template <> +inline const onert_tflite::CastOptions * +Operator::builtin_options_as<onert_tflite::CastOptions>() const { return builtin_options_as_CastOptions(); } -template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const +template <> +inline const onert_tflite::DequantizeOptions * +Operator::builtin_options_as<onert_tflite::DequantizeOptions>() const { return builtin_options_as_DequantizeOptions(); } template <> -inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const +inline const onert_tflite::MaximumMinimumOptions * +Operator::builtin_options_as<onert_tflite::MaximumMinimumOptions>() const { return builtin_options_as_MaximumMinimumOptions(); } -template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const +template <> +inline const onert_tflite::ArgMaxOptions * +Operator::builtin_options_as<onert_tflite::ArgMaxOptions>() const { return builtin_options_as_ArgMaxOptions(); } -template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const +template <> +inline const onert_tflite::LessOptions * +Operator::builtin_options_as<onert_tflite::LessOptions>() const { return builtin_options_as_LessOptions(); } -template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const +template <> +inline const onert_tflite::NegOptions * +Operator::builtin_options_as<onert_tflite::NegOptions>() const { return builtin_options_as_NegOptions(); } -template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const +template <> +inline const onert_tflite::PadV2Options * +Operator::builtin_options_as<onert_tflite::PadV2Options>() const { return builtin_options_as_PadV2Options(); } -template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const +template <> +inline const onert_tflite::GreaterOptions * +Operator::builtin_options_as<onert_tflite::GreaterOptions>() const { return builtin_options_as_GreaterOptions(); } template <> -inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const +inline const onert_tflite::GreaterEqualOptions * +Operator::builtin_options_as<onert_tflite::GreaterEqualOptions>() const { return builtin_options_as_GreaterEqualOptions(); } -template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const +template <> +inline const onert_tflite::LessEqualOptions * +Operator::builtin_options_as<onert_tflite::LessEqualOptions>() const { return builtin_options_as_LessEqualOptions(); } -template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const +template <> +inline const onert_tflite::SelectOptions * +Operator::builtin_options_as<onert_tflite::SelectOptions>() const { return builtin_options_as_SelectOptions(); } -template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const +template <> +inline const onert_tflite::SliceOptions * +Operator::builtin_options_as<onert_tflite::SliceOptions>() const { return builtin_options_as_SliceOptions(); } template <> -inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const +inline const onert_tflite::TransposeConvOptions * +Operator::builtin_options_as<onert_tflite::TransposeConvOptions>() const { return builtin_options_as_TransposeConvOptions(); } template <> -inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const +inline const onert_tflite::SparseToDenseOptions * +Operator::builtin_options_as<onert_tflite::SparseToDenseOptions>() const { return builtin_options_as_SparseToDenseOptions(); } -template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const +template <> +inline const onert_tflite::TileOptions * +Operator::builtin_options_as<onert_tflite::TileOptions>() const { return builtin_options_as_TileOptions(); } -template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const +template <> +inline const onert_tflite::ExpandDimsOptions * +Operator::builtin_options_as<onert_tflite::ExpandDimsOptions>() const { return builtin_options_as_ExpandDimsOptions(); } -template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const +template <> +inline const onert_tflite::EqualOptions * +Operator::builtin_options_as<onert_tflite::EqualOptions>() const { return builtin_options_as_EqualOptions(); } -template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const +template <> +inline const onert_tflite::NotEqualOptions * +Operator::builtin_options_as<onert_tflite::NotEqualOptions>() const { return builtin_options_as_NotEqualOptions(); } -template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const +template <> +inline const onert_tflite::ShapeOptions * +Operator::builtin_options_as<onert_tflite::ShapeOptions>() const { return builtin_options_as_ShapeOptions(); } -template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const +template <> +inline const onert_tflite::PowOptions * +Operator::builtin_options_as<onert_tflite::PowOptions>() const { return builtin_options_as_PowOptions(); } -template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const +template <> +inline const onert_tflite::ArgMinOptions * +Operator::builtin_options_as<onert_tflite::ArgMinOptions>() const { return builtin_options_as_ArgMinOptions(); } -template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const +template <> +inline const onert_tflite::FakeQuantOptions * +Operator::builtin_options_as<onert_tflite::FakeQuantOptions>() const { return builtin_options_as_FakeQuantOptions(); } -template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const +template <> +inline const onert_tflite::PackOptions * +Operator::builtin_options_as<onert_tflite::PackOptions>() const { return builtin_options_as_PackOptions(); } -template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const +template <> +inline const onert_tflite::LogicalOrOptions * +Operator::builtin_options_as<onert_tflite::LogicalOrOptions>() const { return builtin_options_as_LogicalOrOptions(); } -template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const +template <> +inline const onert_tflite::OneHotOptions * +Operator::builtin_options_as<onert_tflite::OneHotOptions>() const { return builtin_options_as_OneHotOptions(); } -template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const +template <> +inline const onert_tflite::LogicalAndOptions * +Operator::builtin_options_as<onert_tflite::LogicalAndOptions>() const { return builtin_options_as_LogicalAndOptions(); } -template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const +template <> +inline const onert_tflite::LogicalNotOptions * +Operator::builtin_options_as<onert_tflite::LogicalNotOptions>() const { return builtin_options_as_LogicalNotOptions(); } -template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const +template <> +inline const onert_tflite::UnpackOptions * +Operator::builtin_options_as<onert_tflite::UnpackOptions>() const { return builtin_options_as_UnpackOptions(); } -template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const +template <> +inline const onert_tflite::FloorDivOptions * +Operator::builtin_options_as<onert_tflite::FloorDivOptions>() const { return builtin_options_as_FloorDivOptions(); } -template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const +template <> +inline const onert_tflite::SquareOptions * +Operator::builtin_options_as<onert_tflite::SquareOptions>() const { return builtin_options_as_SquareOptions(); } -template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const +template <> +inline const onert_tflite::ZerosLikeOptions * +Operator::builtin_options_as<onert_tflite::ZerosLikeOptions>() const { return builtin_options_as_ZerosLikeOptions(); } -template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const +template <> +inline const onert_tflite::FillOptions * +Operator::builtin_options_as<onert_tflite::FillOptions>() const { return builtin_options_as_FillOptions(); } template <> -inline const BidirectionalSequenceLSTMOptions * -Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const +inline const onert_tflite::BidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<onert_tflite::BidirectionalSequenceLSTMOptions>() const { return builtin_options_as_BidirectionalSequenceLSTMOptions(); } template <> -inline const BidirectionalSequenceRNNOptions * -Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const +inline const onert_tflite::BidirectionalSequenceRNNOptions * +Operator::builtin_options_as<onert_tflite::BidirectionalSequenceRNNOptions>() const { return builtin_options_as_BidirectionalSequenceRNNOptions(); } template <> -inline const UnidirectionalSequenceLSTMOptions * -Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const +inline const onert_tflite::UnidirectionalSequenceLSTMOptions * +Operator::builtin_options_as<onert_tflite::UnidirectionalSequenceLSTMOptions>() const { return builtin_options_as_UnidirectionalSequenceLSTMOptions(); } -template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const +template <> +inline const onert_tflite::FloorModOptions * +Operator::builtin_options_as<onert_tflite::FloorModOptions>() const { return builtin_options_as_FloorModOptions(); } -template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const +template <> +inline const onert_tflite::RangeOptions * +Operator::builtin_options_as<onert_tflite::RangeOptions>() const { return builtin_options_as_RangeOptions(); } template <> -inline const ResizeNearestNeighborOptions * -Operator::builtin_options_as<ResizeNearestNeighborOptions>() const +inline const onert_tflite::ResizeNearestNeighborOptions * +Operator::builtin_options_as<onert_tflite::ResizeNearestNeighborOptions>() const { return builtin_options_as_ResizeNearestNeighborOptions(); } -template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const +template <> +inline const onert_tflite::LeakyReluOptions * +Operator::builtin_options_as<onert_tflite::LeakyReluOptions>() const { return builtin_options_as_LeakyReluOptions(); } template <> -inline const SquaredDifferenceOptions * -Operator::builtin_options_as<SquaredDifferenceOptions>() const +inline const onert_tflite::SquaredDifferenceOptions * +Operator::builtin_options_as<onert_tflite::SquaredDifferenceOptions>() const { return builtin_options_as_SquaredDifferenceOptions(); } -template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const +template <> +inline const onert_tflite::MirrorPadOptions * +Operator::builtin_options_as<onert_tflite::MirrorPadOptions>() const { return builtin_options_as_MirrorPadOptions(); } -template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const +template <> +inline const onert_tflite::AbsOptions * +Operator::builtin_options_as<onert_tflite::AbsOptions>() const { return builtin_options_as_AbsOptions(); } -template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const +template <> +inline const onert_tflite::SplitVOptions * +Operator::builtin_options_as<onert_tflite::SplitVOptions>() const { return builtin_options_as_SplitVOptions(); } -template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const +template <> +inline const onert_tflite::UniqueOptions * +Operator::builtin_options_as<onert_tflite::UniqueOptions>() const { return builtin_options_as_UniqueOptions(); } -template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const +template <> +inline const onert_tflite::ReverseV2Options * +Operator::builtin_options_as<onert_tflite::ReverseV2Options>() const { return builtin_options_as_ReverseV2Options(); } -template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const +template <> +inline const onert_tflite::AddNOptions * +Operator::builtin_options_as<onert_tflite::AddNOptions>() const { return builtin_options_as_AddNOptions(); } -template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const +template <> +inline const onert_tflite::GatherNdOptions * +Operator::builtin_options_as<onert_tflite::GatherNdOptions>() const { return builtin_options_as_GatherNdOptions(); } -template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const +template <> +inline const onert_tflite::CosOptions * +Operator::builtin_options_as<onert_tflite::CosOptions>() const { return builtin_options_as_CosOptions(); } -template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const +template <> +inline const onert_tflite::WhereOptions * +Operator::builtin_options_as<onert_tflite::WhereOptions>() const { return builtin_options_as_WhereOptions(); } -template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const +template <> +inline const onert_tflite::RankOptions * +Operator::builtin_options_as<onert_tflite::RankOptions>() const { return builtin_options_as_RankOptions(); } template <> -inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const +inline const onert_tflite::ReverseSequenceOptions * +Operator::builtin_options_as<onert_tflite::ReverseSequenceOptions>() const { return builtin_options_as_ReverseSequenceOptions(); } -template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const +template <> +inline const onert_tflite::MatrixDiagOptions * +Operator::builtin_options_as<onert_tflite::MatrixDiagOptions>() const { return builtin_options_as_MatrixDiagOptions(); } -template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const +template <> +inline const onert_tflite::QuantizeOptions * +Operator::builtin_options_as<onert_tflite::QuantizeOptions>() const { return builtin_options_as_QuantizeOptions(); } template <> -inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const +inline const onert_tflite::MatrixSetDiagOptions * +Operator::builtin_options_as<onert_tflite::MatrixSetDiagOptions>() const { return builtin_options_as_MatrixSetDiagOptions(); } -template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const +template <> +inline const onert_tflite::HardSwishOptions * +Operator::builtin_options_as<onert_tflite::HardSwishOptions>() const { return builtin_options_as_HardSwishOptions(); } -template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const +template <> +inline const onert_tflite::IfOptions *Operator::builtin_options_as<onert_tflite::IfOptions>() const { return builtin_options_as_IfOptions(); } -template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const +template <> +inline const onert_tflite::WhileOptions * +Operator::builtin_options_as<onert_tflite::WhileOptions>() const { return builtin_options_as_WhileOptions(); } template <> -inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const +inline const onert_tflite::DepthToSpaceOptions * +Operator::builtin_options_as<onert_tflite::DepthToSpaceOptions>() const { return builtin_options_as_DepthToSpaceOptions(); } template <> -inline const NonMaxSuppressionV4Options * -Operator::builtin_options_as<NonMaxSuppressionV4Options>() const +inline const onert_tflite::NonMaxSuppressionV4Options * +Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV4Options>() const { return builtin_options_as_NonMaxSuppressionV4Options(); } template <> -inline const NonMaxSuppressionV5Options * -Operator::builtin_options_as<NonMaxSuppressionV5Options>() const +inline const onert_tflite::NonMaxSuppressionV5Options * +Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV5Options>() const { return builtin_options_as_NonMaxSuppressionV5Options(); } -template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const +template <> +inline const onert_tflite::ScatterNdOptions * +Operator::builtin_options_as<onert_tflite::ScatterNdOptions>() const { return builtin_options_as_ScatterNdOptions(); } -template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const +template <> +inline const onert_tflite::SelectV2Options * +Operator::builtin_options_as<onert_tflite::SelectV2Options>() const { return builtin_options_as_SelectV2Options(); } -template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const +template <> +inline const onert_tflite::DensifyOptions * +Operator::builtin_options_as<onert_tflite::DensifyOptions>() const { return builtin_options_as_DensifyOptions(); } -template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const +template <> +inline const onert_tflite::SegmentSumOptions * +Operator::builtin_options_as<onert_tflite::SegmentSumOptions>() const { return builtin_options_as_SegmentSumOptions(); } template <> -inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const +inline const onert_tflite::BatchMatMulOptions * +Operator::builtin_options_as<onert_tflite::BatchMatMulOptions>() const { return builtin_options_as_BatchMatMulOptions(); } +template <> +inline const onert_tflite::CumsumOptions * +Operator::builtin_options_as<onert_tflite::CumsumOptions>() const +{ + return builtin_options_as_CumsumOptions(); +} + +template <> +inline const onert_tflite::CallOnceOptions * +Operator::builtin_options_as<onert_tflite::CallOnceOptions>() const +{ + return builtin_options_as_CallOnceOptions(); +} + +template <> +inline const onert_tflite::BroadcastToOptions * +Operator::builtin_options_as<onert_tflite::BroadcastToOptions>() const +{ + return builtin_options_as_BroadcastToOptions(); +} + +template <> +inline const onert_tflite::Rfft2dOptions * +Operator::builtin_options_as<onert_tflite::Rfft2dOptions>() const +{ + return builtin_options_as_Rfft2dOptions(); +} + +template <> +inline const onert_tflite::Conv3DOptions * +Operator::builtin_options_as<onert_tflite::Conv3DOptions>() const +{ + return builtin_options_as_Conv3DOptions(); +} + +template <> +inline const onert_tflite::HashtableOptions * +Operator::builtin_options_as<onert_tflite::HashtableOptions>() const +{ + return builtin_options_as_HashtableOptions(); +} + +template <> +inline const onert_tflite::HashtableFindOptions * +Operator::builtin_options_as<onert_tflite::HashtableFindOptions>() const +{ + return builtin_options_as_HashtableFindOptions(); +} + +template <> +inline const onert_tflite::HashtableImportOptions * +Operator::builtin_options_as<onert_tflite::HashtableImportOptions>() const +{ + return builtin_options_as_HashtableImportOptions(); +} + +template <> +inline const onert_tflite::HashtableSizeOptions * +Operator::builtin_options_as<onert_tflite::HashtableSizeOptions>() const +{ + return builtin_options_as_HashtableSizeOptions(); +} + +template <> +inline const onert_tflite::VarHandleOptions * +Operator::builtin_options_as<onert_tflite::VarHandleOptions>() const +{ + return builtin_options_as_VarHandleOptions(); +} + +template <> +inline const onert_tflite::ReadVariableOptions * +Operator::builtin_options_as<onert_tflite::ReadVariableOptions>() const +{ + return builtin_options_as_ReadVariableOptions(); +} + +template <> +inline const onert_tflite::AssignVariableOptions * +Operator::builtin_options_as<onert_tflite::AssignVariableOptions>() const +{ + return builtin_options_as_AssignVariableOptions(); +} + +template <> +inline const onert_tflite::RandomOptions * +Operator::builtin_options_as<onert_tflite::RandomOptions>() const +{ + return builtin_options_as_RandomOptions(); +} + struct OperatorBuilder { + typedef Operator Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_opcode_index(uint32_t opcode_index) @@ -8436,7 +10021,7 @@ struct OperatorBuilder { fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); } - void add_builtin_options_type(BuiltinOptions builtin_options_type) + void add_builtin_options_type(onert_tflite::BuiltinOptions builtin_options_type) { fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast<uint8_t>(builtin_options_type), 0); @@ -8449,7 +10034,7 @@ struct OperatorBuilder { fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); } - void add_custom_options_format(CustomOptionsFormat custom_options_format) + void add_custom_options_format(onert_tflite::CustomOptionsFormat custom_options_format) { fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast<int8_t>(custom_options_format), 0); @@ -8467,7 +10052,6 @@ struct OperatorBuilder { start_ = fbb_.StartTable(); } - OperatorBuilder &operator=(const OperatorBuilder &); flatbuffers::Offset<Operator> Finish() { const auto end = fbb_.EndTable(start_); @@ -8476,16 +10060,17 @@ struct OperatorBuilder } }; -inline flatbuffers::Offset<Operator> -CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, - BuiltinOptions builtin_options_type = BuiltinOptions_NONE, - flatbuffers::Offset<void> builtin_options = 0, - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, - CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0, - flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) +inline flatbuffers::Offset<Operator> CreateOperator( + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, + onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0, + onert_tflite::CustomOptionsFormat custom_options_format = + onert_tflite::CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) { OperatorBuilder builder_(_fbb); builder_.add_intermediates(intermediates); @@ -8500,28 +10085,32 @@ CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, return builder_.Finish(); } -inline flatbuffers::Offset<Operator> -CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - const std::vector<int32_t> *inputs = nullptr, - const std::vector<int32_t> *outputs = nullptr, - BuiltinOptions builtin_options_type = BuiltinOptions_NONE, - flatbuffers::Offset<void> builtin_options = 0, - const std::vector<uint8_t> *custom_options = nullptr, - CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS, - const std::vector<uint8_t> *mutating_variable_inputs = nullptr, - const std::vector<int32_t> *intermediates = nullptr) -{ - return onert_tflite::CreateOperator( - _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, - outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options, - custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format, - mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0, - intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0); +inline flatbuffers::Offset<Operator> CreateOperatorDirect( + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, + onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE, + flatbuffers::Offset<void> builtin_options = 0, + const std::vector<uint8_t> *custom_options = nullptr, + onert_tflite::CustomOptionsFormat custom_options_format = + onert_tflite::CustomOptionsFormat_FLEXBUFFERS, + const std::vector<uint8_t> *mutating_variable_inputs = nullptr, + const std::vector<int32_t> *intermediates = nullptr) +{ + auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0; + auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0; + auto mutating_variable_inputs__ = + mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0; + auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0; + return onert_tflite::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type, + builtin_options, custom_options__, custom_options_format, + mutating_variable_inputs__, intermediates__); } struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef SubGraphBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TENSORS = 4, VT_INPUTS = 6, @@ -8529,9 +10118,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_OPERATORS = 10, VT_NAME = 12 }; - const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS); + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *>( + VT_TENSORS); } const flatbuffers::Vector<int32_t> *inputs() const { @@ -8541,9 +10131,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS); } - const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *operators() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS); + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *>( + VT_OPERATORS); } const flatbuffers::String *name() const { @@ -8563,9 +10154,11 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct SubGraphBuilder { + typedef SubGraph Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors) + void add_tensors( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors) { fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); } @@ -8577,8 +10170,8 @@ struct SubGraphBuilder { fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); } - void - add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators) + void add_operators( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators) { fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); } @@ -8590,7 +10183,6 @@ struct SubGraphBuilder { start_ = fbb_.StartTable(); } - SubGraphBuilder &operator=(const SubGraphBuilder &); flatbuffers::Offset<SubGraph> Finish() { const auto end = fbb_.EndTable(start_); @@ -8601,10 +10193,11 @@ struct SubGraphBuilder inline flatbuffers::Offset<SubGraph> CreateSubGraph( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors = 0, flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0, flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators = + 0, flatbuffers::Offset<flatbuffers::String> name = 0) { SubGraphBuilder builder_(_fbb); @@ -8618,21 +10211,25 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph( inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect( flatbuffers::FlatBufferBuilder &_fbb, - const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors = nullptr, const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr, - const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr) + const std::vector<flatbuffers::Offset<onert_tflite::Operator>> *operators = nullptr, + const char *name = nullptr) { - return onert_tflite::CreateSubGraph( - _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0, - inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0, - outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, - operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0, - name ? _fbb.CreateString(name) : 0); + auto tensors__ = + tensors ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Tensor>>(*tensors) : 0; + auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0; + auto operators__ = + operators ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Operator>>(*operators) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + return onert_tflite::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__); } struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef BufferBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_DATA = 4 }; @@ -8649,6 +10246,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct BufferBuilder { + typedef Buffer Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) @@ -8659,7 +10257,6 @@ struct BufferBuilder { start_ = fbb_.StartTable(); } - BufferBuilder &operator=(const BufferBuilder &); flatbuffers::Offset<Buffer> Finish() { const auto end = fbb_.EndTable(start_); @@ -8680,12 +10277,18 @@ CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector<uint8_t> *data = nullptr) { - return onert_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0); + if (data) + { + _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); + } + auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0; + return onert_tflite::CreateBuffer(_fbb, data__); } struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef MetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NAME = 4, VT_BUFFER = 6 @@ -8705,6 +10308,7 @@ struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table struct MetadataBuilder { + typedef Metadata Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_name(flatbuffers::Offset<flatbuffers::String> name) @@ -8716,7 +10320,6 @@ struct MetadataBuilder { start_ = fbb_.StartTable(); } - MetadataBuilder &operator=(const MetadataBuilder &); flatbuffers::Offset<Metadata> Finish() { const auto end = fbb_.EndTable(start_); @@ -8739,12 +10342,179 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe const char *name = nullptr, uint32_t buffer = 0) { - return onert_tflite::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer); + auto name__ = name ? _fbb.CreateString(name) : 0; + return onert_tflite::CreateMetadata(_fbb, name__, buffer); +} + +struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef TensorMapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_NAME = 4, + VT_TENSOR_INDEX = 6 + }; + const flatbuffers::String *name() const + { + return GetPointer<const flatbuffers::String *>(VT_NAME); + } + uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) && + verifier.EndTable(); + } +}; + +struct TensorMapBuilder +{ + typedef TensorMap Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset<flatbuffers::String> name) + { + fbb_.AddOffset(TensorMap::VT_NAME, name); + } + void add_tensor_index(uint32_t tensor_index) + { + fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0); + } + explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<TensorMap> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<TensorMap>(end); + return o; + } +}; + +inline flatbuffers::Offset<TensorMap> +CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0) +{ + TensorMapBuilder builder_(_fbb); + builder_.add_tensor_index(tensor_index); + builder_.add_name(name); + return builder_.Finish(); +} + +inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t tensor_index = 0) +{ + auto name__ = name ? _fbb.CreateString(name) : 0; + return onert_tflite::CreateTensorMap(_fbb, name__, tensor_index); +} + +struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table +{ + typedef SignatureDefBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_INPUTS = 4, + VT_OUTPUTS = 6, + VT_SIGNATURE_KEY = 8, + VT_SUBGRAPH_INDEX = 12 + }; + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>( + VT_INPUTS); + } + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>( + VT_OUTPUTS); + } + const flatbuffers::String *signature_key() const + { + return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY); + } + uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); } + bool Verify(flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) && + verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) && + verifier.VerifyString(signature_key()) && + VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable(); + } +}; + +struct SignatureDefBuilder +{ + typedef SignatureDef Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_inputs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs) + { + fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs); + } + void add_outputs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs) + { + fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs); + } + void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key) + { + fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key); + } + void add_subgraph_index(uint32_t subgraph_index) + { + fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0); + } + explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset<SignatureDef> Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset<SignatureDef>(end); + return o; + } +}; + +inline flatbuffers::Offset<SignatureDef> CreateSignatureDef( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs = + 0, + flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0) +{ + SignatureDefBuilder builder_(_fbb); + builder_.add_subgraph_index(subgraph_index); + builder_.add_signature_key(signature_key); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + return builder_.Finish(); +} + +inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs = nullptr, + const char *signature_key = nullptr, uint32_t subgraph_index = 0) +{ + auto inputs__ = + inputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*inputs) : 0; + auto outputs__ = + outputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*outputs) : 0; + auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0; + return onert_tflite::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, + subgraph_index); } struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum + typedef ModelBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VERSION = 4, VT_OPERATOR_CODES = 6, @@ -8752,33 +10522,42 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table VT_DESCRIPTION = 10, VT_BUFFERS = 12, VT_METADATA_BUFFER = 14, - VT_METADATA = 16 + VT_METADATA = 16, + VT_SIGNATURE_DEFS = 18 }; uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); } - const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>( + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *>( VT_OPERATOR_CODES); } - const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS); + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *>( + VT_SUBGRAPHS); } const flatbuffers::String *description() const { return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION); } - const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS); + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *>( + VT_BUFFERS); } const flatbuffers::Vector<int32_t> *metadata_buffer() const { return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER); } - const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata() const { - return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA); + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *>( + VT_METADATA); + } + const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs() const + { + return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *>( + VT_SIGNATURE_DEFS); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -8791,22 +10570,26 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) && VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) && VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) && - verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable(); + verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) && + verifier.VerifyVector(signature_defs()) && + verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable(); } }; struct ModelBuilder { + typedef Model Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); } void add_operator_codes( - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>> + operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } - void - add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs) + void add_subgraphs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs) { fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); } @@ -8814,7 +10597,8 @@ struct ModelBuilder { fbb_.AddOffset(Model::VT_DESCRIPTION, description); } - void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers) + void add_buffers( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers) { fbb_.AddOffset(Model::VT_BUFFERS, buffers); } @@ -8822,16 +10606,21 @@ struct ModelBuilder { fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); } - void - add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata) + void add_metadata( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata) { fbb_.AddOffset(Model::VT_METADATA, metadata); } + void add_signature_defs( + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>> + signature_defs) + { + fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs); + } explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ModelBuilder &operator=(const ModelBuilder &); flatbuffers::Offset<Model> Finish() { const auto end = fbb_.EndTable(start_); @@ -8842,14 +10631,20 @@ struct ModelBuilder inline flatbuffers::Offset<Model> CreateModel( flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>> + operator_codes = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs = + 0, flatbuffers::Offset<flatbuffers::String> description = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers = 0, flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0, - flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0) + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata = + 0, + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>> + signature_defs = 0) { ModelBuilder builder_(_fbb); + builder_.add_signature_defs(signature_defs); builder_.add_metadata(metadata); builder_.add_metadata_buffer(metadata_buffer); builder_.add_buffers(buffers); @@ -8860,23 +10655,34 @@ inline flatbuffers::Offset<Model> CreateModel( return builder_.Finish(); } -inline flatbuffers::Offset<Model> -CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr, - const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr, - const char *description = nullptr, - const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr, - const std::vector<int32_t> *metadata_buffer = nullptr, - const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr) -{ - return onert_tflite::CreateModel( - _fbb, version, - operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0, - subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0, - description ? _fbb.CreateString(description) : 0, - buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0, - metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0, - metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0); +inline flatbuffers::Offset<Model> CreateModelDirect( + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + const std::vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers = nullptr, + const std::vector<int32_t> *metadata_buffer = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata = nullptr, + const std::vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs = nullptr) +{ + auto operator_codes__ = + operator_codes + ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::OperatorCode>>(*operator_codes) + : 0; + auto subgraphs__ = + subgraphs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SubGraph>>(*subgraphs) : 0; + auto description__ = description ? _fbb.CreateString(description) : 0; + auto buffers__ = + buffers ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Buffer>>(*buffers) : 0; + auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0; + auto metadata__ = + metadata ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Metadata>>(*metadata) : 0; + auto signature_defs__ = + signature_defs + ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SignatureDef>>(*signature_defs) + : 0; + return onert_tflite::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, + buffers__, metadata_buffer__, metadata__, signature_defs__); } inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, @@ -8890,11 +10696,11 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi } case QuantizationDetails_CustomQuantization: { - auto ptr = reinterpret_cast<const CustomQuantization *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::CustomQuantization *>(obj); return verifier.VerifyTable(ptr); } default: - return false; + return true; } } @@ -8929,21 +10735,21 @@ inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void } case SparseIndexVector_Int32Vector: { - auto ptr = reinterpret_cast<const Int32Vector *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::Int32Vector *>(obj); return verifier.VerifyTable(ptr); } case SparseIndexVector_Uint16Vector: { - auto ptr = reinterpret_cast<const Uint16Vector *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::Uint16Vector *>(obj); return verifier.VerifyTable(ptr); } case SparseIndexVector_Uint8Vector: { - auto ptr = reinterpret_cast<const Uint8Vector *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::Uint8Vector *>(obj); return verifier.VerifyTable(ptr); } default: - return false; + return true; } } @@ -8977,511 +10783,576 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob } case BuiltinOptions_Conv2DOptions: { - auto ptr = reinterpret_cast<const Conv2DOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::Conv2DOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_DepthwiseConv2DOptions: { - auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::DepthwiseConv2DOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ConcatEmbeddingsOptions: { - auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ConcatEmbeddingsOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LSHProjectionOptions: { - auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LSHProjectionOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_Pool2DOptions: { - auto ptr = reinterpret_cast<const Pool2DOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::Pool2DOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SVDFOptions: { - auto ptr = reinterpret_cast<const SVDFOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SVDFOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_RNNOptions: { - auto ptr = reinterpret_cast<const RNNOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::RNNOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_FullyConnectedOptions: { - auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::FullyConnectedOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SoftmaxOptions: { - auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SoftmaxOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ConcatenationOptions: { - auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ConcatenationOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_AddOptions: { - auto ptr = reinterpret_cast<const AddOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::AddOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_L2NormOptions: { - auto ptr = reinterpret_cast<const L2NormOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::L2NormOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LocalResponseNormalizationOptions: { - auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LocalResponseNormalizationOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LSTMOptions: { - auto ptr = reinterpret_cast<const LSTMOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LSTMOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ResizeBilinearOptions: { - auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ResizeBilinearOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_CallOptions: { - auto ptr = reinterpret_cast<const CallOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::CallOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ReshapeOptions: { - auto ptr = reinterpret_cast<const ReshapeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ReshapeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SkipGramOptions: { - auto ptr = reinterpret_cast<const SkipGramOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SkipGramOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SpaceToDepthOptions: { - auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SpaceToDepthOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_EmbeddingLookupSparseOptions: { - auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_MulOptions: { - auto ptr = reinterpret_cast<const MulOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::MulOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_PadOptions: { - auto ptr = reinterpret_cast<const PadOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::PadOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_GatherOptions: { - auto ptr = reinterpret_cast<const GatherOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::GatherOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_BatchToSpaceNDOptions: { - auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::BatchToSpaceNDOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SpaceToBatchNDOptions: { - auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SpaceToBatchNDOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_TransposeOptions: { - auto ptr = reinterpret_cast<const TransposeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::TransposeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ReducerOptions: { - auto ptr = reinterpret_cast<const ReducerOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ReducerOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SubOptions: { - auto ptr = reinterpret_cast<const SubOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SubOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_DivOptions: { - auto ptr = reinterpret_cast<const DivOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::DivOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SqueezeOptions: { - auto ptr = reinterpret_cast<const SqueezeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SqueezeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SequenceRNNOptions: { - auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SequenceRNNOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_StridedSliceOptions: { - auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::StridedSliceOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ExpOptions: { - auto ptr = reinterpret_cast<const ExpOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ExpOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_TopKV2Options: { - auto ptr = reinterpret_cast<const TopKV2Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::TopKV2Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SplitOptions: { - auto ptr = reinterpret_cast<const SplitOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SplitOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LogSoftmaxOptions: { - auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LogSoftmaxOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_CastOptions: { - auto ptr = reinterpret_cast<const CastOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::CastOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_DequantizeOptions: { - auto ptr = reinterpret_cast<const DequantizeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::DequantizeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_MaximumMinimumOptions: { - auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::MaximumMinimumOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ArgMaxOptions: { - auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ArgMaxOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LessOptions: { - auto ptr = reinterpret_cast<const LessOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LessOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_NegOptions: { - auto ptr = reinterpret_cast<const NegOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::NegOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_PadV2Options: { - auto ptr = reinterpret_cast<const PadV2Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::PadV2Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_GreaterOptions: { - auto ptr = reinterpret_cast<const GreaterOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::GreaterOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_GreaterEqualOptions: { - auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::GreaterEqualOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LessEqualOptions: { - auto ptr = reinterpret_cast<const LessEqualOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LessEqualOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SelectOptions: { - auto ptr = reinterpret_cast<const SelectOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SelectOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SliceOptions: { - auto ptr = reinterpret_cast<const SliceOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SliceOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_TransposeConvOptions: { - auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::TransposeConvOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SparseToDenseOptions: { - auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SparseToDenseOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_TileOptions: { - auto ptr = reinterpret_cast<const TileOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::TileOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ExpandDimsOptions: { - auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ExpandDimsOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_EqualOptions: { - auto ptr = reinterpret_cast<const EqualOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::EqualOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_NotEqualOptions: { - auto ptr = reinterpret_cast<const NotEqualOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::NotEqualOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ShapeOptions: { - auto ptr = reinterpret_cast<const ShapeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ShapeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_PowOptions: { - auto ptr = reinterpret_cast<const PowOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::PowOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ArgMinOptions: { - auto ptr = reinterpret_cast<const ArgMinOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ArgMinOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_FakeQuantOptions: { - auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::FakeQuantOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_PackOptions: { - auto ptr = reinterpret_cast<const PackOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::PackOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LogicalOrOptions: { - auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LogicalOrOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_OneHotOptions: { - auto ptr = reinterpret_cast<const OneHotOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::OneHotOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LogicalAndOptions: { - auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LogicalAndOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LogicalNotOptions: { - auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LogicalNotOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_UnpackOptions: { - auto ptr = reinterpret_cast<const UnpackOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::UnpackOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_FloorDivOptions: { - auto ptr = reinterpret_cast<const FloorDivOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::FloorDivOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SquareOptions: { - auto ptr = reinterpret_cast<const SquareOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SquareOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ZerosLikeOptions: { - auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ZerosLikeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_FillOptions: { - auto ptr = reinterpret_cast<const FillOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::FillOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_BidirectionalSequenceLSTMOptions: { - auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_BidirectionalSequenceRNNOptions: { - auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { - auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_FloorModOptions: { - auto ptr = reinterpret_cast<const FloorModOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::FloorModOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_RangeOptions: { - auto ptr = reinterpret_cast<const RangeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::RangeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ResizeNearestNeighborOptions: { - auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ResizeNearestNeighborOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LeakyReluOptions: { - auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::LeakyReluOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SquaredDifferenceOptions: { - auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SquaredDifferenceOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_MirrorPadOptions: { - auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::MirrorPadOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_AbsOptions: { - auto ptr = reinterpret_cast<const AbsOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::AbsOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SplitVOptions: { - auto ptr = reinterpret_cast<const SplitVOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SplitVOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_UniqueOptions: { - auto ptr = reinterpret_cast<const UniqueOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::UniqueOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ReverseV2Options: { - auto ptr = reinterpret_cast<const ReverseV2Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ReverseV2Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_AddNOptions: { - auto ptr = reinterpret_cast<const AddNOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::AddNOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_GatherNdOptions: { - auto ptr = reinterpret_cast<const GatherNdOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::GatherNdOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_CosOptions: { - auto ptr = reinterpret_cast<const CosOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::CosOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_WhereOptions: { - auto ptr = reinterpret_cast<const WhereOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::WhereOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_RankOptions: { - auto ptr = reinterpret_cast<const RankOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::RankOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ReverseSequenceOptions: { - auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ReverseSequenceOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_MatrixDiagOptions: { - auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::MatrixDiagOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_QuantizeOptions: { - auto ptr = reinterpret_cast<const QuantizeOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::QuantizeOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_MatrixSetDiagOptions: { - auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::MatrixSetDiagOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_HardSwishOptions: { - auto ptr = reinterpret_cast<const HardSwishOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::HardSwishOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_IfOptions: { - auto ptr = reinterpret_cast<const IfOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::IfOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_WhileOptions: { - auto ptr = reinterpret_cast<const WhileOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::WhileOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_DepthToSpaceOptions: { - auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::DepthToSpaceOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_NonMaxSuppressionV4Options: { - auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV4Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_NonMaxSuppressionV5Options: { - auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV5Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_ScatterNdOptions: { - auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::ScatterNdOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SelectV2Options: { - auto ptr = reinterpret_cast<const SelectV2Options *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SelectV2Options *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_DensifyOptions: { - auto ptr = reinterpret_cast<const DensifyOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::DensifyOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_SegmentSumOptions: { - auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::SegmentSumOptions *>(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_BatchMatMulOptions: { - auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj); + auto ptr = reinterpret_cast<const onert_tflite::BatchMatMulOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CumsumOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::CumsumOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOnceOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::CallOnceOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BroadcastToOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::BroadcastToOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Rfft2dOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::Rfft2dOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Conv3DOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::Conv3DOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::HashtableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableFindOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::HashtableFindOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableImportOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::HashtableImportOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableSizeOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::HashtableSizeOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_VarHandleOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::VarHandleOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReadVariableOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::ReadVariableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AssignVariableOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::AssignVariableOptions *>(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RandomOptions: + { + auto ptr = reinterpret_cast<const onert_tflite::RandomOptions *>(obj); return verifier.VerifyTable(ptr); } default: - return false; + return true; } } diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt new file mode 100644 index 000000000..7a0df4eaa --- /dev/null +++ b/runtime/onert/frontend/trix/CMakeLists.txt @@ -0,0 +1,21 @@ +if (NOT BUILD_TRIX_LOADER) + return() +endif () + +nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +if(TRIXEngine_FOUND) + list(APPEND SOURCES src/trix_loader.cc) +else() + list(APPEND SOURCES src/trix_loader_dummy.cc) +endif(TRIXEngine_FOUND) + +add_library(trix_loader STATIC ${SOURCES}) +set_target_properties(trix_loader PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(trix_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_link_libraries(trix_loader PRIVATE onert_core) +target_link_libraries(trix_loader PRIVATE nnfw_common nnfw_coverage) + +if(TRIXEngine_FOUND) + target_include_directories(trix_loader PUBLIC ${TRIXEngine_INCLUDE_DIR}) + target_link_libraries(trix_loader PRIVATE trix_engine) +endif(TRIXEngine_FOUND) diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h new file mode 100644 index 000000000..297d5ec28 --- /dev/null +++ b/runtime/onert/frontend/trix/include/trix_loader.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TRIX_TRIX_LOADER_H__ +#define __TRIX_TRIX_LOADER_H__ + +#include "ir/Graph.h" +#include <memory> + +namespace onert +{ +namespace trix_loader +{ +/** + * @throw runtime_error when tvn path is wrong or tvn is invalid + */ +std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename); +} // namespace trix_loader +} // namespace onert + +#endif // __TRIX_TRIX_LOADER_H__ diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc new file mode 100644 index 000000000..e2995bbd1 --- /dev/null +++ b/runtime/onert/frontend/trix/src/trix_loader.cc @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "trix_loader.h" + +#include "ir/Graph.h" +#include "ir/operation/Bulk.h" + +#include <libnpuhost.h> +#include <npubinfmt.h> +#include <typedef.h> + +namespace onert +{ +namespace trix_loader +{ + +/** + * @brief A tvn metadata reader + */ +class TrixMetaReader +{ +public: + TrixMetaReader() = default; + ~TrixMetaReader() { free(_meta); } + + void init(const char *path); + data_layout input_seg_layout(uint32_t n) const { return _meta->input_seg_layout[n]; } + data_layout output_seg_layout(uint32_t n) const { return _meta->output_seg_layout[n]; } + data_type input_seg_quant_type(uint32_t n) const { return _meta->input_seg_quant_type[n]; } + data_type output_seg_quant_type(uint32_t n) const { return _meta->output_seg_quant_type[n]; } + float input_seg_quant_scale(uint32_t n) const { return _meta->input_seg_quant_s[n]; } + float output_seg_quant_scale(uint32_t n) const { return _meta->output_seg_quant_s[n]; } + int32_t input_seg_quant_zp(uint32_t n) { return _meta->input_seg_quant_z[n]; } + int32_t output_seg_quant_zp(uint32_t n) { return _meta->output_seg_quant_z[n]; } + uint32_t input_seg_num() const { return _meta->input_seg_num; } + uint32_t output_seg_num() const { return _meta->output_seg_num; } + uint32_t input_seg_dims(uint32_t n, uint32_t axis) const + { + return _meta->input_seg_dims[n][axis]; + } + uint32_t output_seg_dims(uint32_t n, uint32_t axis) const + { + return _meta->output_seg_dims[n][axis]; + } + +private: + npubin_meta *_meta = nullptr; +}; + +void TrixMetaReader::init(const char *path) +{ + assert(path); + _meta = getNPUmodel_metadata(path, false); + if (_meta == nullptr) + { + throw std::runtime_error("Failed to get TRIV2 model metadata"); + } + if (NPUBIN_VERSION(_meta->magiccode) != 3) + { + throw std::runtime_error("TRIV2 model metadata version mismatched."); + } +} + +class TrixLoader +{ +public: + /** + * @brief Construct a new Loader object + * + * @param graph reference on subgraphs + */ + explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {} + + /** + * @brief Load a model from file + * @param file_path + */ + void loadFromFile(const std::string &file_path); + +private: + /* + * @brief Load actually + * @throw runtime_error when tvn path is wrong or tvn is invalid + */ + void loadModel(); + void loadSubgraphs(); + std::unique_ptr<ir::Graph> loadSubgraph(); + void loadOperands(ir::Graph &subg); + ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg); + ir::OperandIndex loadOperandFromOutput(uint32_t i, ir::Graph &subg); + void loadBulk(ir::Graph &subg); + void loadOperationIO(ir::OperandIndexSequence &inputs, ir::OperandIndexSequence &outputs); + ir::OperandIndex inputIdxToOperandIdx(uint32_t i) const; + ir::OperandIndex outputIdxToOperandIdx(uint32_t i) const; + ir::DataType toDataType(const data_type type) const; + +private: +protected: + /** path to model (e.g. tvn) */ + std::string _model_path; + /** Reference on loadable subgraphs */ + std::unique_ptr<ir::Subgraphs> &_subgraphs; + TrixMetaReader _meta; +}; + +ir::DataType TrixLoader::toDataType(const data_type type) const +{ + switch (type) + { + case DATA_TYPE_QASYMM8: + return ir::DataType::QUANT_UINT8_ASYMM; + case DATA_TYPE_QSYMM16: + return ir::DataType::QUANT_INT16_SYMM; + default: + throw std::runtime_error("Unsupported data type from trix model"); + } +} + +ir::OperandIndex TrixLoader::inputIdxToOperandIdx(uint32_t i) const { return ir::OperandIndex(i); } +ir::OperandIndex TrixLoader::outputIdxToOperandIdx(uint32_t i) const +{ + return ir::OperandIndex(_meta.input_seg_num() + i); +} + +void TrixLoader::loadOperationIO(ir::OperandIndexSequence &inputs, + ir::OperandIndexSequence &outputs) +{ + for (uint32_t i = 0; i < _meta.input_seg_num(); ++i) + { + inputs.append(inputIdxToOperandIdx(i)); + } + + for (uint32_t i = 0; i < _meta.output_seg_num(); ++i) + { + outputs.append(outputIdxToOperandIdx(i)); + } +} + +void TrixLoader::loadBulk(ir::Graph &subg) +{ + ir::operation::Bulk::Param param; + param.binary_path = _model_path; + + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + + loadOperationIO(inputs, outputs); + + std::unique_ptr<ir::operation::Bulk> bulk(new ir::operation::Bulk(inputs, outputs, param)); + subg.addOperation(std::move(bulk)); +} + +ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg) +{ + // Shape + ir::Shape shape; + for (uint32_t d = 0; d < MAX_RANK; ++d) + shape.append(_meta.input_seg_dims(idx, d)); + + // TypeInfo + ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)), + _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx)); + + // Create operand + const auto operand_index = subg.addOperand(shape, type_info); + return operand_index; +} + +ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg) +{ + // Shape + ir::Shape shape; + for (uint32_t d = 0; d < MAX_RANK; ++d) + shape.append(_meta.output_seg_dims(idx, d)); + + // TypeInfo + ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)), + _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx)); + + // Create operand + const auto operand_index = subg.addOperand(shape, type_info); + return operand_index; +} + +void TrixLoader::loadOperands(ir::Graph &subg) +{ + auto in_num = _meta.input_seg_num(); + for (uint32_t i = 0; i < in_num; ++i) + { + loadOperandFromInput(i, subg); + } + auto out_num = _meta.output_seg_num(); + for (uint32_t i = 0; i < out_num; ++i) + { + loadOperandFromOutput(i, subg); + } +} + +std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph() +{ + auto subg = std::make_unique<ir::Graph>(); + _meta.init(_model_path.c_str()); + + // Load tensors + loadOperands(*subg); + + // Set inputs + for (uint32_t i = 0; i < _meta.input_seg_num(); ++i) + { + subg->addInput(inputIdxToOperandIdx(i), "tvn_input" + std::to_string(i)); + } + // Set outputs + for (uint32_t i = 0; i < _meta.output_seg_num(); ++i) + { + subg->addOutput(outputIdxToOperandIdx(i), "tvn_out" + std::to_string(i)); + } + // Create operations + loadBulk(*subg); + + // TODO: NHWC only supported at this moment. + subg->setLayout(ir::Layout::NHWC); + subg->verify(); + return subg; +} + +void TrixLoader::loadSubgraphs() +{ + // one subgraph only + auto subg = loadSubgraph(); + _subgraphs->push(ir::SubgraphIndex(0), std::move(subg)); +} + +void TrixLoader::loadModel() { loadSubgraphs(); } + +void TrixLoader::loadFromFile(const std::string &file_path) +{ + // model path will be used to set Bulk param + _model_path = file_path; + // metadata is initialized from model path since it is loadFromFile + _meta.init(_model_path.c_str()); + loadModel(); +} + +std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename) +{ + auto subgraphs = std::make_unique<ir::Subgraphs>(); + TrixLoader loader(subgraphs); + loader.loadFromFile(filename); + return subgraphs; +} +} // namespace trix_loader +} // namespace onert diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc new file mode 100644 index 000000000..9fc8e1ff2 --- /dev/null +++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "trix_loader.h" + +// Dummy implementation to avoid build error for target, which doesn't have trix_engine + +namespace onert +{ +namespace trix_loader +{ +std::unique_ptr<ir::Subgraphs> loadModel(const std::string &) +{ + auto subgraphs = std::make_unique<ir::Subgraphs>(); + return subgraphs; +} +} // namespace trix_loader +} // namespace onert |