diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
commit | 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch) | |
tree | 3f991636c1e9423d38eb16a384c20b569b0d678e /compiler | |
parent | 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff) | |
download | nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip |
Imported Upstream version 1.9.0upstream/1.9.0submit/tizen/20200905.125700accepted/tizen/unified/20200906.032650
Diffstat (limited to 'compiler')
181 files changed, 7136 insertions, 3280 deletions
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt index 009bfabea..5075b13d5 100644 --- a/compiler/circle-quantizer/CMakeLists.txt +++ b/compiler/circle-quantizer/CMakeLists.txt @@ -1,8 +1,6 @@ -file(GLOB_RECURSE SOURCES "src/*.cpp") +set (SOURCES src/CircleQuantizer.cpp) add_executable(circle-quantizer "${SOURCES}") -target_include_directories(circle-quantizer PRIVATE include) -target_include_directories(circle-quantizer PRIVATE src) target_link_libraries(circle-quantizer foder) target_link_libraries(circle-quantizer safemain) target_link_libraries(circle-quantizer oops) diff --git a/compiler/circle-quantizer/include/CircleExpContract.h b/compiler/circle-quantizer/include/CircleExpContract.h deleted file mode 100644 index e888e4a12..000000000 --- a/compiler/circle-quantizer/include/CircleExpContract.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__ -#define __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__ - -#include <loco.h> -#include <luci/CircleExporter.h> -#include <luci/IR/Module.h> - -#include <memory> -#include <string> - -struct CircleExpContract : public luci::CircleExporter::Contract -{ -public: - CircleExpContract(luci::Module *module, const std::string &filename) - : _module(module), _filepath(filename) - { - // NOTHING TO DO - } - virtual ~CircleExpContract() = default; - -public: - loco::Graph *graph(void) const final { return nullptr; } - luci::Module *module(void) const final { return _module; }; - -public: - bool store(const char *ptr, const size_t size) const final; - -private: - luci::Module *_module; - const std::string _filepath; -}; - -#endif // __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__ diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index 8d3a80c91..54b38a170 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -14,14 +14,13 @@ * limitations under the License. */ -#include "CircleExpContract.h" - #include <foder/FileLoader.h> #include <luci/Importer.h> #include <luci/CircleOptimizer.h> #include <luci/Service/Validate.h> #include <luci/CircleExporter.h> +#include <luci/CircleFileExpContract.h> #include <oops/InternalExn.h> #include <arser/arser.h> @@ -37,6 +36,14 @@ using OptionHook = std::function<int(const char **)>; using Algorithms = luci::CircleOptimizer::Options::Algorithm; using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters; +void print_exclusive_options(void) +{ + std::cout << "Use only one of the 3 options below." << std::endl; + std::cout << " --quantize_dequantize_weights" << std::endl; + std::cout << " --quantize_with_minmax" << std::endl; + std::cout << " --requantize" << std::endl; +} + void print_version(void) { std::cout << "circle-quantizer version " << vconone::get_string() << std::endl; @@ -53,6 +60,7 @@ int entry(int argc, char **argv) const std::string qdqw = "--quantize_dequantize_weights"; const std::string qwmm = "--quantize_with_minmax"; + const std::string rq = "--requantize"; arser::Arser arser("circle-quantizer provides circle model quantization"); @@ -79,6 +87,14 @@ int entry(int argc, char **argv) "Three arguments required: input_dtype(float32) " "output_dtype(uint8) granularity(layer, channel)"); + arser.add_argument(rq) + .nargs(2) + .type(arser::DataType::STR_VEC) + .required(false) + .help("Requantize a quantized model. " + "Two arguments required: input_dtype(int8) " + "output_dtype(uint8)"); + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); @@ -95,6 +111,11 @@ int entry(int argc, char **argv) if (arser[qdqw]) { + if (arser[qwmm] || arser[rq]) + { + print_exclusive_options(); + return 255; + } auto values = arser.get<std::vector<std::string>>(qdqw); if (values.size() != 3) { @@ -110,6 +131,11 @@ int entry(int argc, char **argv) if (arser[qwmm]) { + if (arser[qdqw] || arser[rq]) + { + print_exclusive_options(); + return 255; + } auto values = arser.get<std::vector<std::string>>(qwmm); if (values.size() != 3) { @@ -123,12 +149,40 @@ int entry(int argc, char **argv) options->param(AlgorithmParameters::Quantize_granularity, values.at(2)); } + if (arser[rq]) + { + if (arser[qwmm] || arser[qdqw]) + { + print_exclusive_options(); + return 255; + } + auto values = arser.get<std::vector<std::string>>(rq); + if (values.size() != 2) + { + std::cerr << arser; + return 255; + } + options->enable(Algorithms::Requantize); + + options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0)); + options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1)); + } + std::string input_path = arser.get<std::string>("input"); std::string output_path = arser.get<std::string>("output"); // Load model from the file foder::FileLoader file_loader{input_path}; std::vector<char> model_data = file_loader.load(); + + // Verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + return EXIT_FAILURE; + } + const circle::Model *circle_model = circle::GetModel(model_data.data()); if (circle_model == nullptr) { @@ -157,7 +211,7 @@ int entry(int argc, char **argv) // Export to output Circle file luci::CircleExporter exporter; - CircleExpContract contract(module.get(), output_path); + luci::CircleFileExpContract contract(module.get(), output_path); if (!exporter.invoke(&contract)) { diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index 6328a64db..302c3a796 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -10,6 +10,7 @@ ## TFLITE RECIPE +Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv) Add(Net_InstanceNorm_001 PASS fuse_instnorm) Add(Net_InstanceNorm_002 PASS fuse_instnorm) Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul) diff --git a/compiler/circle2circle/include/CircleExpContract.h b/compiler/circle2circle/include/CircleExpContract.h deleted file mode 100644 index 313b16d22..000000000 --- a/compiler/circle2circle/include/CircleExpContract.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__ -#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__ - -#include <loco.h> -#include <luci/CircleExporter.h> -#include <luci/IR/Module.h> -#include <mio/circle/schema_generated.h> - -#include <memory> -#include <string> - -struct CircleExpContract : public luci::CircleExporter::Contract -{ -public: - CircleExpContract(luci::Module *module, const std::string &filename) - : _module(module), _filepath(filename) - { - // NOTHING TO DO - } - virtual ~CircleExpContract() = default; - -public: - loco::Graph *graph(void) const final { return nullptr; } - luci::Module *module(void) const final { return _module; }; - -public: - bool store(const char *ptr, const size_t size) const final; - -private: - luci::Module *_module; - const std::string _filepath; -}; - -#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__ diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 849597b46..39ceade3a 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -14,14 +14,13 @@ * limitations under the License. */ -#include "CircleExpContract.h" - #include <foder/FileLoader.h> #include <luci/Importer.h> #include <luci/CircleOptimizer.h> #include <luci/Service/Validate.h> #include <luci/CircleExporter.h> +#include <luci/CircleFileExpContract.h> #include <luci/UserSettings.h> #include <oops/InternalExn.h> @@ -61,6 +60,12 @@ int entry(int argc, char **argv) arser.add_argument("--all").nargs(0).required(false).default_value(false).help( "Enable all optimize options"); + arser.add_argument("--fuse_batchnorm_with_tconv") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fuse BatchNorm operators to Transposed Convolution operator"); + arser.add_argument("--fuse_bcq") .nargs(0) .required(false) @@ -101,7 +106,7 @@ int entry(int argc, char **argv) .nargs(0) .required(false) .default_value(false) - .help("This will turn off operator vaidations. May help input model investigation."); + .help("This will turn off operator validations. May help input model investigation."); arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); @@ -125,6 +130,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::ResolveCustomOpBatchMatMul); options->enable(Algorithms::ResolveCustomOpMatMul); } + if (arser.get<bool>("--fuse_batchnorm_with_tconv")) + options->enable(Algorithms::FuseBatchNormWithTConv); if (arser.get<bool>("--fuse_bcq")) options->enable(Algorithms::FuseBCQ); if (arser.get<bool>("--fuse_instnorm")) @@ -157,6 +164,14 @@ int entry(int argc, char **argv) std::cerr << err.what() << std::endl; return EXIT_FAILURE; } + + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + return EXIT_FAILURE; + } + const circle::Model *circle_model = circle::GetModel(model_data.data()); if (circle_model == nullptr) { @@ -177,15 +192,20 @@ int entry(int argc, char **argv) if (!luci::validate(graph)) { - std::cerr << "ERROR: Optimized graph is invalid" << std::endl; - return 255; + if (settings->get(luci::UserSettings::Key::DisableValidation)) + std::cerr << "WARNING: Optimized graph is invalid" << std::endl; + else + { + std::cerr << "ERROR: Optimized graph is invalid" << std::endl; + return 255; + } } } // Export to output Circle file luci::CircleExporter exporter; - CircleExpContract contract(module.get(), output_path); + luci::CircleFileExpContract contract(module.get(), output_path); if (!exporter.invoke(&contract)) { diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt index 75165ada3..2ca016b84 100644 --- a/compiler/circlechef/circle/CMakeLists.txt +++ b/compiler/circlechef/circle/CMakeLists.txt @@ -7,3 +7,4 @@ target_link_libraries(circlechef_circle circlechef_proto) target_link_libraries(circlechef_circle mio_circle) target_link_libraries(circlechef_circle stdex) target_link_libraries(circlechef_circle cwrap) +target_link_libraries(circlechef_circle souschef) diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h index a8ef3ee44..23ca29beb 100644 --- a/compiler/circlechef/circle/src/CircleImport.h +++ b/compiler/circlechef/circle/src/CircleImport.h @@ -19,6 +19,8 @@ #include <mio/circle/schema_generated.h> +#include <souschef/TensorFiller.h> + #include <circlechef.pb.h> #include <map> @@ -40,7 +42,7 @@ bool is_custom(const circle::OperatorCode *opcode); /** * @brief Loads TF lite file and provides helpers to access attributes */ -class CircleImport +class CircleImport : public souschef::TensorFiller { public: CircleImport(const circle::Model *model); @@ -63,63 +65,6 @@ public: std::string opcode_name(const circle::Operator *op) const; size_t buffer_info(const circle::Tensor *tensor, const uint8_t **buff_data); - /** - * @brief This will record the tensor by index, if it needs filler option, - * such as kernel, bias. - */ - void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; } - - /** - * @brief This will store int32 filler values such as reshape information for the tensor - */ - void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) - { - _tensor_filler_vint32[tensor_index] = expvalues; - } - - void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) - { - _tensor_filler_vfloat[tensor_index] = expvalues; - } - - /** - * @brief This will return true if the tensor by index, needs a filler option. - */ - bool get_tensor_filler(uint32_t tensor_index) - { - auto it = _tensor_filler.find(tensor_index); - if (it != _tensor_filler.end()) - { - return it->second; - } - return false; - } - - /** - * @brief This will return true if the tensor by index, needs a int array filler option. - */ - bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) - { - auto it = _tensor_filler_vint32.find(tensor_index); - if (it != _tensor_filler_vint32.end()) - { - expvalues = it->second; - return true; - } - return false; - } - - bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) - { - auto it = _tensor_filler_vfloat.find(tensor_index); - if (it != _tensor_filler_vfloat.end()) - { - expvalues = it->second; - return true; - } - return false; - } - private: const CircleSubGraphs_t *_subgraphs{nullptr}; const CircleBuffers_t *_buffers{nullptr}; @@ -129,10 +74,6 @@ private: std::vector<const circle::OperatorCode *> _op_codes{}; std::vector<int32_t> _inputs{}; std::vector<int32_t> _outputs{}; - - std::map<uint32_t, bool> _tensor_filler{}; - std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{}; - std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{}; }; } // namespace circlechef diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp index d81467d68..aa54678ec 100644 --- a/compiler/circlechef/core/src/ModelChef.cpp +++ b/compiler/circlechef/core/src/ModelChef.cpp @@ -26,6 +26,7 @@ #include "OpChefs.h" #include <souschef/Dataset.h> +#include <souschef/Dims.h> #include "Log.h" @@ -41,52 +42,8 @@ #include <sstream> #include <stdexcept> -namespace -{ - using namespace souschef; -template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field) -{ - std::vector<T> res; - for (const auto &elem : field) - { - res.emplace_back(elem); - } - return res; -} - -template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field) -{ - return Dataset<T>(as_vector<T>(field)); -} - -} // namespace - -namespace -{ - -template <typename T> using Dims = std::vector<T>; - -Dims<int32_t> as_dims(const circlechef::TensorShape &shape) -{ - std::vector<int32_t> res; - - for (auto &dim : shape.dim()) - { - res.emplace_back(static_cast<int32_t>(dim)); - } - - return res; -} - -int32_t element_count(const Dims<int32_t> &dims) -{ - return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>()); -} - -} // namespace - namespace { diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp index 3294bb23d..a0a063e79 100644 --- a/compiler/circledump/src/OpPrinter.cpp +++ b/compiler/circledump/src/OpPrinter.cpp @@ -725,6 +725,7 @@ OpPrinterRegistry::OpPrinterRegistry() _op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>(); _op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>(); // There is no Option for NON_MAX_SUPPRESSION_V4 + // There is no Option for NON_MAX_SUPPRESSION_V5 _op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>(); _op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>(); // There is no Option for PAD diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt index ef50e8d43..ec9e3cf85 100644 --- a/compiler/common-artifacts/CMakeLists.txt +++ b/compiler/common-artifacts/CMakeLists.txt @@ -33,10 +33,12 @@ set(REQUIREMENTS_FILE "requirements.txt") set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}") set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}") +# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0' +# NOTE adding version is for temporary hotfix of setuptools 50.x.y version add_custom_command( OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} - COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ) @@ -46,7 +48,7 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0} ) @@ -233,10 +235,10 @@ foreach(RECIPE IN ITEMS ${RECIPES}) set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5") set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}") - + set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5") set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}") - + if(NOT DEFINED NO_TCGEN_${RECIPE}) # Generate input.h5, expected.h5 add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH} @@ -244,7 +246,7 @@ foreach(RECIPE IN ITEMS ${RECIPES}) DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}" ) - + # Generate test directory set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc") add_custom_command(OUTPUT ${TC_DIRECTORY} @@ -252,7 +254,7 @@ foreach(RECIPE IN ITEMS ${RECIPES}) DEPENDS ${NNPKG_PATH} COMMENT "Generate ${RECIPE} nnpackage test directory" ) - + # Move input hdf5 file to test directory set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5") add_custom_command(OUTPUT ${INPUT_NNPKG_PATH} @@ -260,7 +262,7 @@ foreach(RECIPE IN ITEMS ${RECIPES}) DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY} COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage" ) - + # Move expected hdf5 file to test directory set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5") add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH} diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index fe9933ae0..886f607cf 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -96,6 +96,8 @@ tcgenerate(Mean_U8_000) tcgenerate(Minimum_000) tcgenerate(NonMaxSuppressionV4_000) tcgenerate(NonMaxSuppressionV4_001) +tcgenerate(NonMaxSuppressionV5_000) +tcgenerate(NonMaxSuppressionV5_001) tcgenerate(MirrorPad_000) tcgenerate(Mul_U8_000) tcgenerate(Neg_000) diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp index dfe32ca92..b84fa7e3c 100644 --- a/compiler/locomotiv/src/Node/BiasAdd.cpp +++ b/compiler/locomotiv/src/Node/BiasAdd.cpp @@ -41,10 +41,12 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_ } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add) +using namespace locomotiv; + +void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add) { validate(bias_add, "BiasAdd is nullptr"); @@ -63,7 +65,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add) annot_domain(bias_add, annot_domain(bias_add->value())); } -void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add) +void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add) { validate(bias_add, "BiasAdd is nullptr"); @@ -82,7 +84,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add) annot_domain(bias_add, loco::Domain::Feature); } -} // namespace locomotiv +} // namespace namespace { @@ -123,3 +125,18 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_ } } // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add) +{ + execute_node(bias_add); +} + +void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add) +{ + execute_node(bias_add); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp index c2f2b44c0..21f00a495 100644 --- a/compiler/locomotiv/src/Node/BiasEncode.cpp +++ b/compiler/locomotiv/src/Node/BiasEncode.cpp @@ -23,10 +23,12 @@ #include <stdexcept> #include <cassert> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::BiasEncode *bias_enc) +using namespace locomotiv; + +void execute_node(loco::BiasEncode *bias_enc) { auto input_data = annot_data(bias_enc->input()); @@ -60,4 +62,11 @@ void NodeExecution::execute(loco::BiasEncode *bias_enc) annot_domain(bias_enc, loco::Domain::Bias); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::BiasEncode *bias_enc) { execute_node(bias_enc); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp index 0360b9fef..96ffbc257 100644 --- a/compiler/locomotiv/src/Node/ConstGen.cpp +++ b/compiler/locomotiv/src/Node/ConstGen.cpp @@ -53,10 +53,12 @@ inline uint32_t offset_by_index(const Shape &shape, const Index &index) } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::ConstGen *constgen) +using namespace locomotiv; + +void execute_node(loco::ConstGen *constgen) { uint32_t volume = 1; @@ -113,4 +115,11 @@ void NodeExecution::execute(loco::ConstGen *constgen) annot_domain(constgen, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::ConstGen *constgen) { execute_node(constgen); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp index 2e4185574..cdf0dfd56 100644 --- a/compiler/locomotiv/src/Node/Conv2D.cpp +++ b/compiler/locomotiv/src/Node/Conv2D.cpp @@ -139,10 +139,12 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::Conv2D *conv2d) +using namespace locomotiv; + +void execute_node(loco::Conv2D *conv2d) { auto ifm_data = annot_data(conv2d->ifm()); auto ker_data = annot_data(conv2d->ker()); @@ -176,4 +178,11 @@ void NodeExecution::execute(loco::Conv2D *conv2d) annot_domain(conv2d, loco::Domain::Feature); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Conv2D *conv2d) { execute_node(conv2d); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp index 92d5aa161..f39cd177e 100644 --- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp +++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp @@ -143,10 +143,12 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) +using namespace locomotiv; + +void execute_node(loco::DepthwiseConv2D *dw_conv2d) { auto ifm_data = annot_data(dw_conv2d->ifm()); auto ker_data = annot_data(dw_conv2d->ker()); @@ -182,4 +184,11 @@ void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) annot_domain(dw_conv2d, loco::Domain::Feature); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) { execute_node(dw_conv2d); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp index 17004901f..03f5bf833 100644 --- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp +++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp @@ -79,10 +79,12 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) +using namespace locomotiv; + +void execute_node(loco::DepthwiseFilterEncode *enc) { auto input_data = annot_data(enc->input()); @@ -110,4 +112,11 @@ void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) annot_domain(enc, loco::Domain::DepthwiseFilter); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) { execute_node(enc); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp index 8a56a56b2..8776e1b42 100644 --- a/compiler/locomotiv/src/Node/FeatureDecode.cpp +++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp @@ -72,10 +72,12 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::FeatureDecode *dec) +using namespace locomotiv; + +void execute_node(loco::FeatureDecode *dec) { auto input_data = annot_data(dec->input()); @@ -109,4 +111,11 @@ void NodeExecution::execute(loco::FeatureDecode *dec) annot_domain(dec, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::FeatureDecode *dec) { execute_node(dec); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp index cd9d708dc..0e2ac918f 100644 --- a/compiler/locomotiv/src/Node/FilterEncode.cpp +++ b/compiler/locomotiv/src/Node/FilterEncode.cpp @@ -74,10 +74,12 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::FilterEncode *enc) +using namespace locomotiv; + +void execute_node(loco::FilterEncode *enc) { auto input_data = annot_data(enc->input()); @@ -111,4 +113,11 @@ void NodeExecution::execute(loco::FilterEncode *enc) annot_domain(enc, loco::Domain::Filter); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::FilterEncode *enc) { execute_node(enc); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp index eb7d44a59..9095ecf00 100644 --- a/compiler/locomotiv/src/Node/Forward.cpp +++ b/compiler/locomotiv/src/Node/Forward.cpp @@ -23,10 +23,12 @@ #include <stdexcept> #include <cassert> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::Forward *forward) +using namespace locomotiv; + +void execute_node(loco::Forward *forward) { auto input_data = annot_data(forward->input()); @@ -59,4 +61,11 @@ void NodeExecution::execute(loco::Forward *forward) annot_domain(forward, annot_domain(forward->input())); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Forward *forward) { execute_node(forward); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp index 77b7315a9..e5d149ac5 100644 --- a/compiler/locomotiv/src/Node/MatMul.cpp +++ b/compiler/locomotiv/src/Node/MatMul.cpp @@ -82,10 +82,12 @@ template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buf } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::MatMul *mat_mul) +using namespace locomotiv; + +void execute_node(loco::MatMul *mat_mul) { auto lhs_data = annot_data(mat_mul->lhs()); auto rhs_data = annot_data(mat_mul->rhs()); @@ -130,4 +132,11 @@ void NodeExecution::execute(loco::MatMul *mat_mul) annot_domain(mat_mul, loco::Domain::Matrix); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MatMul *mat_mul) { execute_node(mat_mul); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp index c591676ae..0310015f1 100644 --- a/compiler/locomotiv/src/Node/MatrixDecode.cpp +++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp @@ -68,10 +68,12 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::MatrixDecode *matrix_dec) +using namespace locomotiv; + +void execute_node(loco::MatrixDecode *matrix_dec) { auto input_data = annot_data(matrix_dec->input()); @@ -106,4 +108,11 @@ void NodeExecution::execute(loco::MatrixDecode *matrix_dec) annot_domain(matrix_dec, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MatrixDecode *matrix_dec) { execute_node(matrix_dec); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp index 5d92f89f5..8dce1cb1e 100644 --- a/compiler/locomotiv/src/Node/MaxPool2D.cpp +++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp @@ -129,10 +129,12 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d, } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::MaxPool2D *maxpool2d) +using namespace locomotiv; + +void execute_node(loco::MaxPool2D *maxpool2d) { auto ifm_data = annot_data(maxpool2d->ifm()); @@ -164,4 +166,11 @@ void NodeExecution::execute(loco::MaxPool2D *maxpool2d) annot_domain(maxpool2d, loco::Domain::Feature); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp index c482d8b04..fe5d7c2e1 100644 --- a/compiler/locomotiv/src/Node/Pull.cpp +++ b/compiler/locomotiv/src/Node/Pull.cpp @@ -24,10 +24,12 @@ #include <cassert> #include <stdexcept> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::Pull *pull) +using namespace locomotiv; + +void execute_node(loco::Pull *pull) { // TODO Remove deprecated code #if 0 @@ -69,4 +71,11 @@ void NodeExecution::execute(loco::Pull *pull) annot_domain(pull, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Pull *pull) { execute_node(pull); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp index fc5808b15..4e1c6c3b8 100644 --- a/compiler/locomotiv/src/Node/Push.cpp +++ b/compiler/locomotiv/src/Node/Push.cpp @@ -23,10 +23,12 @@ #include <stdexcept> #include <cassert> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::Push *push) +using namespace locomotiv; + +void execute_node(loco::Push *push) { auto from_data = annot_data(push->from()); @@ -58,4 +60,11 @@ void NodeExecution::execute(loco::Push *push) annot_domain(push, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Push *push) { execute_node(push); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp index ac1672024..a9c07bee1 100644 --- a/compiler/locomotiv/src/Node/Reshape.cpp +++ b/compiler/locomotiv/src/Node/Reshape.cpp @@ -36,10 +36,12 @@ using nncc::core::ADT::tensor::num_elements; #include <cstring> #include <vector> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape) +using namespace locomotiv; + +void execute_node(loco::Reshape<loco::ReshapeType::Fixed> *reshape) { auto input_data = annot_data(reshape->input()); @@ -87,4 +89,14 @@ void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape) annot_domain(reshape, annot_domain(reshape->input())); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape) +{ + execute_node(reshape); +} + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp index 352598b27..0018eb66f 100644 --- a/compiler/locomotiv/src/Node/Softmax.cpp +++ b/compiler/locomotiv/src/Node/Softmax.cpp @@ -65,10 +65,12 @@ Shape reduce_shape(const Shape &shape, uint32_t axis) } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TensorSoftmax *softmax) +using namespace locomotiv; + +void execute_node(loco::TensorSoftmax *softmax) { auto input_data = annot_data(softmax->input()); @@ -119,4 +121,11 @@ void NodeExecution::execute(loco::TensorSoftmax *softmax) annot_domain(softmax, annot_domain(softmax->input())); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorSoftmax *softmax) { execute_node(softmax); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp index 010ca6821..38e5a7aa9 100644 --- a/compiler/locomotiv/src/Node/TensorBroadcast.cpp +++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp @@ -34,10 +34,12 @@ using nncc::core::ADT::tensor::Shape; #include <cassert> #include <stdexcept> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast) +using namespace locomotiv; + +void execute_node(loco::TensorBroadcast *tensor_broadcast) { auto input_data = annot_data(tensor_broadcast->input()); @@ -103,4 +105,14 @@ void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast) annot_domain(tensor_broadcast, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast) +{ + execute_node(tensor_broadcast); +} + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp index 3187a7f75..188bb635b 100644 --- a/compiler/locomotiv/src/Node/TensorConcat.cpp +++ b/compiler/locomotiv/src/Node/TensorConcat.cpp @@ -35,10 +35,12 @@ using nncc::core::ADT::tensor::Shape; #include <cassert> #include <stdexcept> -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TensorConcat *tensor_concat) +using namespace locomotiv; + +void execute_node(loco::TensorConcat *tensor_concat) { validate(tensor_concat, "TensorConcat is nullptr"); @@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConcat *tensor_concat) annot_domain(tensor_concat, loco::Domain::Tensor); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorConcat *tensor_concat) { execute_node(tensor_concat); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp index cd81a3a4d..5d4ad5d24 100644 --- a/compiler/locomotiv/src/Node/TensorConstantPad.cpp +++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp @@ -31,10 +31,12 @@ using nncc::core::ADT::tensor::IndexEnumerator; using nncc::core::ADT::tensor::LexicalLayout; using nncc::core::ADT::tensor::make_buffer; -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TensorConstantPad *pad) +using namespace locomotiv; + +void execute_node(loco::TensorConstantPad *pad) { validate(pad, "TensorConstantPad is nullptr"); @@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConstantPad *pad) annot_domain(pad, annot_domain(pad->input())); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorConstantPad *pad) { execute_node(pad); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp index a60ebd890..1f619a31a 100644 --- a/compiler/locomotiv/src/Node/TensorReduce.cpp +++ b/compiler/locomotiv/src/Node/TensorReduce.cpp @@ -115,10 +115,12 @@ void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node) } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TensorReduce *node) +using namespace locomotiv; + +void execute_node(loco::TensorReduce *node) { auto input_data = annot_data(node->input()); validate(input_data, "Input not ready"); @@ -149,4 +151,11 @@ void NodeExecution::execute(loco::TensorReduce *node) annot_domain(node, annot_domain(node->input())); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorReduce *node) { execute_node(node); } + } // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp index 3ea4f071d..bec15a5df 100644 --- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp +++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp @@ -147,10 +147,12 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d, } // namespace -namespace locomotiv +namespace { -void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) +using namespace locomotiv; + +void execute_node(loco::TransposedConv2D *tr_conv2d) { auto ifm_data = annot_data(tr_conv2d->ifm()); auto ker_data = annot_data(tr_conv2d->ker()); @@ -186,4 +188,11 @@ void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) annot_domain(tr_conv2d, loco::Domain::Feature); } +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); } + } // namespace locomotiv diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt index 6a66f1425..47b68fa40 100644 --- a/compiler/luci-interpreter/src/CMakeLists.txt +++ b/compiler/luci-interpreter/src/CMakeLists.txt @@ -1,6 +1,7 @@ -nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET) -nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET) -nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET) +nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET) +nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) +nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET) +nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET) if (NOT TensorFlowSource_FOUND) message(STATUS "Skipping luci-interpreter: TensorFlow not found") @@ -17,6 +18,11 @@ if (NOT TensorFlowEigenSource_FOUND) return() endif () +if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() +endif () + add_subdirectory(core) add_subdirectory(kernels) add_subdirectory(loader) diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp index 5ac3b2f7a..2ab7ff0da 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -93,6 +93,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions) /*dimension_data=*/{3}, /*output_data=*/{3, 1}); } +TEST(ArgMaxTest, UnsupportedType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, { + 1, 2, 7, 8, 1, 9, 7, 3, + }); + Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + ArgMaxParams params{}; + params.output_type = DataType::U8; + ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp index 6d1b8ead4..cdd81d7d6 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -35,6 +35,14 @@ AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DPa void AveragePool2D::configure() { + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input Tensor and Output Tensor Type must be same"); + } + if (input()->shape().num_dims() != 4) + { + throw std::runtime_error("Input Tensor Shape must be 4-D"); + } const Shape &input_shape = input()->shape(); const int32_t batches = input_shape.dim(0); @@ -51,7 +59,14 @@ void AveragePool2D::configure() computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); _padding_width = computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); - + if (input()->element_type() == DataType::U8) + { + if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point()) + { + throw std::runtime_error( + "Quantization param for Input and output must be same(scale or zero-point)"); + } + } output()->resize({batches, output_height, output_width, depth}); } diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp index 7160e49e9..cc80e5e90 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -122,6 +122,80 @@ TEST(AveragePool2DTest, Uint8_1) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); } +TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 3, 5}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(AveragePool2DTest, In_Out_Type_NEG) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(AveragePool2DTest, Quant_Param_NEG) +{ + std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f); + std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f); + Tensor input_tensor{ + DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""}; + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); + + std::vector<uint8_t> quant_input = quantize<uint8_t>( + { + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }, + quant_param1.first, quant_param1.second); + input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t)); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt index a1fd1deaf..040ac5911 100644 --- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt +++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt @@ -44,6 +44,8 @@ set(SOURCES Reshape.cpp Reverse.h Reverse.cpp + Rsqrt.h + Rsqrt.cpp Slice.h Slice.cpp Softmax.h @@ -54,8 +56,12 @@ set(SOURCES Split.cpp StridedSlice.h StridedSlice.cpp + Sqrt.h + Sqrt.cpp Squeeze.h Squeeze.cpp + Tanh.h + Tanh.cpp Transpose.h Transpose.cpp TransposeConv.h @@ -63,12 +69,13 @@ set(SOURCES Unpack.h Unpack.cpp) -list(APPEND SOURCES Utils.h Utils.cpp) +list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) add_library(luci_interpreter_kernels STATIC ${SOURCES}) set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" "${TensorFlowGEMMLowpSource_DIR}" "${TensorFlowEigenSource_DIR}" "${TensorFlowSource_DIR}") @@ -99,12 +106,15 @@ set(TEST_SOURCES Pad.test.cpp Reshape.test.cpp Reverse.test.cpp + Rsqrt.test.cpp Slice.test.cpp Softmax.test.cpp SpaceToDepth.test.cpp Split.test.cpp StridedSlice.test.cpp + Sqrt.test.cpp Squeeze.test.cpp + Tanh.test.cpp Transpose.test.cpp TransposeConv.test.cpp Unpack.test.cpp) diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 60e6134ab..a51fb4afc 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -47,21 +47,21 @@ void Conv2D::configure() // We only support (1) and (3) for now. if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) { - assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); } else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) { - assert(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); } else { throw std::runtime_error("Unsupported type."); } - assert(output()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); const Shape &input_shape = input()->shape(); const Shape &filter_shape = filter()->shape(); - assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); const int32_t batches = input_shape.dim(0); const int32_t input_height = input_shape.dim(1); @@ -69,10 +69,10 @@ void Conv2D::configure() const int32_t output_depth = filter_shape.dim(0); const int32_t filter_height = filter_shape.dim(1); const int32_t filter_width = filter_shape.dim(2); - assert(filter_shape.dim(3) == input_shape.dim(3)); + LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3)); - assert(bias() == nullptr || - (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth)); + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == output_depth)); const int32_t output_height = computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index ef9ace903..0446d9760 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -180,6 +180,146 @@ TEST(Conv2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } +TEST(Conv2DTest, Unsupported_Type_Configure_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<int32_t> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data); + Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); + Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(Conv2DTest, Invalid_Bias_Type_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<uint8_t> bias_data{1, 2}; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); + Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(Conv2DTest, Invalid_Bias_Data_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{3}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2, 3}; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); + Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(Conv2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 4, 6, 1}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); + Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp index fce01a605..1a26debe0 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp @@ -39,12 +39,10 @@ void LeakyRelu::configure() assert(input()->element_type() == output()->element_type()); if (input()->element_type() == DataType::U8) { - _q_alpha = static_cast<uint8_t>(std::max<float>( - std::numeric_limits<uint8_t>::min(), - std::min<float>(std::numeric_limits<uint8_t>::max(), - std::round(input()->zero_point() + (params().alpha / input()->scale()))))); - double real_multiplier = input()->scale() * input()->scale() / output()->scale(); - quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift); + double alpha_multiplier = input()->scale() * params().alpha / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); } output()->resize(input()->shape()); } @@ -77,15 +75,15 @@ void LeakyRelu::evalQuantized() const { tflite::LeakyReluParams op_params{}; op_params.input_offset = input()->zero_point(); - op_params.alpha_offset = input()->zero_point(); op_params.output_offset = output()->zero_point(); - - op_params.output_multiplier = _output_multiplier; - op_params.output_shift = _output_shift; + op_params.output_multiplier_alpha = _output_multiplier_alpha; + op_params.output_shift_alpha = _output_shift_alpha; + op_params.output_multiplier_identity = _output_multiplier_identity; + op_params.output_shift_identity = _output_shift_identity; tflite::reference_ops::QuantizeLeakyRelu( - op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()), - getTensorShape(output()), getTensorData<uint8_t>(output())); + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-interpreter/src/kernels/LeakyRelu.h index dcc2be93f..e66f404df 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.h +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.h @@ -41,9 +41,10 @@ private: void evalQuantized() const; private: - uint8_t _q_alpha = 0; - int32_t _output_multiplier = 0; - int _output_shift = 0; + int32_t _output_multiplier_alpha = 0; + int _output_shift_alpha = 0; + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp index a6e721a09..dd31aa099 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.cpp @@ -19,7 +19,8 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> #include <stdexcept> @@ -66,13 +67,13 @@ void Mul::evalFloat() const if (need_broadcast) { - tflite::reference_ops::BroadcastMul4DSlow( + tflite::optimized_ops::BroadcastMul4DSlow( params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); } else { - tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), + tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); } diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp new file mode 100644 index 000000000..6dd92dc98 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Rsqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Rsqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Rsqrt::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = 1.f / std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-interpreter/src/kernels/Rsqrt.h new file mode 100644 index 000000000..adc5bcfa2 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Rsqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H +#define LUCI_INTERPRETER_KERNELS_RSQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Rsqrt : public Kernel +{ +public: + Rsqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp new file mode 100644 index 000000000..69b55d2f2 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/TestUtils.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; + input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(RsqrtTest, SimpleRsqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 5, 4, 8, 2, // + 6, 7.5, 9, 0.3, // + }, + /*output_data=*/ + { + 0.44721360, 0.5, 0.35355339, 0.70710678, // + 0.40824829, 0.36514837, 0.33333333, 1.8257419, // + }); +} + +TEST(RsqrtTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Rsqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(RsqrtTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.cpp new file mode 100644 index 000000000..46e9fc9ad --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Sqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Sqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sqrt::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-interpreter/src/kernels/Sqrt.h new file mode 100644 index 000000000..4034655ed --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H +#define LUCI_INTERPRETER_KERNELS_SQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sqrt : public Kernel +{ +public: + Sqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQRT_H diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp new file mode 100644 index 000000000..cdd208280 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/TestUtils.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; + input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(SqrtTest, SimpleSqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, 8, 2, 4, // + 3, 7, 10, 0.3, // + }, + /*output_data=*/ + { + 0.0, 2.8284271, 1.4142136, 2, // + 1.7320508, 2.6457513, 3.1622777, 0.54772256, // + }); +} + +TEST(SqrtTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Sqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(AddTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp new file mode 100644 index 000000000..b649d5d2f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Tanh::configure() +{ + assert(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + populateLookupTable(); + } + output()->resize(input()->shape()); +} + +void Tanh::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Tanh::evalFloat() const +{ + tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void Tanh::evalQuantized() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + uint8_t *output_data = getTensorData<uint8_t>(output()); + const uint8_t *input_data = getTensorData<uint8_t>(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = getTableValue(input_data[i]); + } +} + +void Tanh::populateLookupTable() +{ + const auto input_scale = static_cast<double>(input()->scale()); + const auto input_zero_point = static_cast<int32_t>(input()->zero_point()); + const auto output_scale = static_cast<double>(output()->scale()); + const auto output_zero_point = static_cast<int32_t>(output()->zero_point()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits<uint8_t>::max(); + int32_t minval = std::numeric_limits<uint8_t>::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = std::tanh(dequantized); + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); + setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)), + static_cast<uint8_t>(val)); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-interpreter/src/kernels/Tanh.h new file mode 100644 index 000000000..8017c9638 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Tanh.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TANH_H +#define LUCI_INTERPRETER_KERNELS_TANH_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Tanh : public Kernel +{ +public: + Tanh(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void populateLookupTable(); + void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; }; + uint8_t getTableValue(uint8_t idx) const { return _table[idx]; }; + +private: + uint8_t _table[256]{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TANH_H diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp new file mode 100644 index 000000000..392b8672d --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" +#include "kernels/TestUtils.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(TanhTest, Float) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + kernel.execute(); + + std::vector<float> ref_output_data{ + 0, -0.9999877, 0.9640275, 0.999329, // + 0.99505475, -0.9640275, 1, 0.7615941, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), + ElementsAreArray(ArrayFloatNear(ref_output_data))); +} + +TEST(TanhTest, Uint8) +{ + float kMin = -1; + float kMax = 127.f / 128.f; + float kTanhTolerance = 2 * (1. / 256); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax); + std::vector<float> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor{ + DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + std::vector<uint8_t> quantize_input = + quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second); + input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + kernel.execute(); + + std::vector<float> ref_output_data{ + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + }; + std::vector<int32_t> ref_output_shape{2, 6, 4, 1}; + EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), + output_tensor.zero_point()), + ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance))); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp index 46380e2fa..898bae3da 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp @@ -30,8 +30,8 @@ namespace kernels { TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - Tensor *output, const TransposeConvParams ¶ms) - : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params) + const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms) + : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params) { } @@ -106,8 +106,9 @@ void TransposeConv::evalFloat() const op_params.output_multiplier = _output_multiplier; tflite::reference_ops::TransposeConv( op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()), - tflite::RuntimeShape(), (float *)nullptr); + getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), + (float *)nullptr); } void TransposeConv::evalQuantized() const @@ -145,8 +146,9 @@ void TransposeConv::evalQuantized() const tflite::reference_ops::TransposeConv( op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()), - getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()), - tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get())); + getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(), + (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h index d73e939b7..3a0eae761 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.h +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h @@ -29,11 +29,12 @@ class TransposeConv : public KernelWithParams<TransposeConvParams> { public: TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - Tensor *output, const TransposeConvParams ¶ms); + const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms); const Tensor *output_shape() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } const Tensor *input() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } Tensor *output() const { return _outputs[0]; } void configure() override; diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp index b8c0ac497..0fbe9328b 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -26,15 +26,15 @@ namespace using namespace testing; -template <typename T> +template <typename T, typename B> void Check(std::initializer_list<int32_t> output_shape_shape, std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_data_shape, - std::initializer_list<int32_t> output_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data, - std::initializer_list<T> input_data_data, std::initializer_list<T> output_data, - luci::Padding padding, int32_t stride_height, int32_t stride_width, - DataType element_type) + std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data, + std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height, + int32_t stride_width, DataType element_type) { Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""}; output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T)); @@ -50,21 +50,32 @@ void Check(std::initializer_list<int32_t> output_shape_shape, params.stride_height = stride_height; params.stride_width = stride_width; - TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor, - params); - kernel.configure(); - kernel.execute(); - + if (bias_data.size() != 0) + { + Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data); + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor, + &output_tensor, params); + kernel.configure(); + kernel.execute(); + } + else + { + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr, + &output_tensor, params); + kernel.configure(); + kernel.execute(); + } EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); } TEST(TransposeConvTest, FloatSimple) { - Check<float>( + Check<float, float>( /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1}, - /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9}, /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*bias_data=*/{}, /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365}, /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1, getElementType<float>()); @@ -74,12 +85,13 @@ TEST(TransposeConvTest, FloatSimple) TEST(TransposeConvTest, FloatTwoFiltersTest) { - Check<float>( + Check<float, float>( /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2}, - /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + /*bias_data=*/{}, /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}, /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1, @@ -88,6 +100,24 @@ TEST(TransposeConvTest, FloatTwoFiltersTest) SUCCEED(); } +TEST(TransposeConvTest, SimpleBiasTest) +{ + Check<float, float>( + /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1}, + /*input_shape=*/{1, 2, 2, 1}, + /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2}, + /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + /*input_data=*/{1, 2, 3, 4}, + /*bias_data=*/{3, 4}, + /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21, + 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34, + 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, + /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2, + getElementType<float>()); + + SUCCEED(); +} + // TODO Uint8Simple // Implement GetDequantizedOutput Function. // Create Test for Uint8 Case diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h index 3c2cc8450..7927151c6 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.h +++ b/compiler/luci-interpreter/src/kernels/Utils.h @@ -31,6 +31,11 @@ namespace luci_interpreter namespace kernels { +#define LUCI_INTERPRETER_CHECK(cond) \ + if (!(cond)) \ + throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \ + std::string(#cond) + ") was not true."); + inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size) { diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp index 12c7f4526..126a1cb5b 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp @@ -37,12 +37,15 @@ #include "kernels/Pad.h" #include "kernels/Reshape.h" #include "kernels/Reverse.h" +#include "kernels/Rsqrt.h" #include "kernels/Slice.h" #include "kernels/Softmax.h" #include "kernels/SpaceToDepth.h" #include "kernels/Split.h" #include "kernels/StridedSlice.h" +#include "kernels/Sqrt.h" #include "kernels/Squeeze.h" +#include "kernels/Tanh.h" #include "kernels/Unpack.h" #include "kernels/Transpose.h" #include "kernels/TransposeConv.h" @@ -430,6 +433,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node) return std::make_unique<kernels::Reverse>(input, axes, output); } +std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node) +{ + assert(node->arity() == 1); + + const Tensor *input = getInputTensor(node->x()); + Tensor *output = getOutputTensor(node); + + return std::make_unique<kernels::Rsqrt>(input, output); +} + std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node) { assert(node->arity() == 3); @@ -483,6 +496,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node) return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); } +std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node) +{ + assert(node->arity() == 1); + + const Tensor *input = getInputTensor(node->x()); + Tensor *output = getOutputTensor(node); + + return std::make_unique<kernels::Sqrt>(input, output); +} + std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node) { assert(node->arity() == 1); @@ -517,6 +540,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); } +std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node) +{ + assert(node->arity() == 1); + + const Tensor *input = getInputTensor(node->x()); + Tensor *output = getOutputTensor(node); + + return std::make_unique<kernels::Tanh>(input, output); +} + std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node) { assert(node->arity() == 2); @@ -530,11 +563,12 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node) std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node) { - assert(node->arity() == 3); + assert(node->arity() == 4); const Tensor *input_sizes = getInputTensor(node->inputSizes()); const Tensor *filter = getInputTensor(node->filter()); const Tensor *out_backprop = getInputTensor(node->outBackprop()); + const Tensor *bias = getOptionalInputTensor(node->bias()); Tensor *output = getOutputTensor(node); @@ -543,7 +577,7 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *no params.stride_height = node->stride()->h(); params.stride_width = node->stride()->w(); - return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output, + return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, params); } diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h index d5c5a4b56..31cb9d8fc 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.h +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h @@ -63,12 +63,15 @@ public: std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override; std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override; std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override; + std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override; std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override; std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override; std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override; std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override; std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override; + std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override; std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override; + std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override; std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override; std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override; std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override; diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp index 33bc8ec9b..4e2bc3d0b 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -37,12 +37,15 @@ #include <kernels/Pad.h> #include <kernels/Reshape.h> #include <kernels/Reverse.h> +#include <kernels/Rsqrt.h> #include <kernels/Slice.h> #include <kernels/Softmax.h> #include <kernels/SpaceToDepth.h> #include <kernels/Split.h> +#include <kernels/Sqrt.h> #include <kernels/Squeeze.h> #include <kernels/StridedSlice.h> +#include <kernels/Tanh.h> #include <kernels/Transpose.h> #include <kernels/TransposeConv.h> #include <kernels/Unpack.h> @@ -529,6 +532,20 @@ TEST_F(KernelBuilderTest, ReverseV2) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, Rsqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRsqrt>(); + op->x(input); + + auto kernel = buildKernel<kernels::Rsqrt>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Slice) { auto *input = createInputNode(); @@ -605,6 +622,20 @@ TEST_F(KernelBuilderTest, Split) checkTensor(kernel->output(1), output2); } +TEST_F(KernelBuilderTest, Sqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleSqrt>(); + op->x(input); + + auto kernel = buildKernel<kernels::Sqrt>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Squeeze) { auto *input = createInputNode(); @@ -656,6 +687,20 @@ TEST_F(KernelBuilderTest, StridedSlice) EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask())); } +TEST_F(KernelBuilderTest, Tanh) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleTanh>(); + op->x(input); + + auto kernel = buildKernel<kernels::Tanh>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Transpose) { auto *input = createInputNode(); @@ -678,11 +723,13 @@ TEST_F(KernelBuilderTest, TransposeConv) auto *output_shape = createInputNode(); auto *filter = createInputNode(); auto *input = createInputNode(); + auto *bias = createInputNode(); auto *op = createNode<luci::CircleTransposeConv>(); op->inputSizes(output_shape); op->filter(filter); op->outBackprop(input); + op->bias(bias); op->padding(luci::Padding::SAME); op->stride()->h(11); @@ -695,6 +742,7 @@ TEST_F(KernelBuilderTest, TransposeConv) checkTensor(kernel->filter(), filter); checkTensor(kernel->input(), input); checkTensor(kernel->output(), op); + checkTensor(kernel->bias(), bias); EXPECT_THAT(kernel->params().padding, Eq(op->padding())); EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt index f3b6dfcfe..f2a4ff4b6 100644 --- a/compiler/luci-value-test/tester/CMakeLists.txt +++ b/compiler/luci-value-test/tester/CMakeLists.txt @@ -1,8 +1,6 @@ set(SRCS_EVAL_TESTER src/EvalTester.cpp - src/CircleExpContract.h - src/CircleExpContract.cpp ) add_executable(luci_eval_tester ${SRCS_EVAL_TESTER}) diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.h b/compiler/luci-value-test/tester/src/CircleExpContract.h deleted file mode 100644 index 4d08fb89b..000000000 --- a/compiler/luci-value-test/tester/src/CircleExpContract.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__ -#define __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__ - -#include <loco.h> -#include <luci/CircleExporter.h> -#include <luci/IR/Module.h> - -#include <memory> -#include <string> - -struct CircleExpContract : public luci::CircleExporter::Contract -{ -public: - CircleExpContract(luci::Module *module, const std::string &filename) - : _module(module), _filepath(filename) - { - // NOTHING TO DO - } - virtual ~CircleExpContract() = default; - -public: - loco::Graph *graph(void) const final { return nullptr; } - luci::Module *module(void) const final { return _module; }; - -public: - bool store(const char *ptr, const size_t size) const final; - -private: - luci::Module *_module; - const std::string _filepath; -}; - -#endif // __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__ diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp index 09eef223a..b49602e5e 100644 --- a/compiler/luci-value-test/tester/src/EvalTester.cpp +++ b/compiler/luci-value-test/tester/src/EvalTester.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "CircleExpContract.h" - #include <luci/Importer.h> #include <luci_interpreter/Interpreter.h> +#include <luci/CircleExporter.h> +#include <luci/CircleFileExpContract.h> #include <cstdlib> #include <fstream> @@ -104,7 +104,9 @@ int entry(int argc, char **argv) // Export to a Circle file luci::CircleExporter exporter; - CircleExpContract contract(initial_module.get(), intermediate_filename); + + luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename); + if (!exporter.invoke(&contract)) { std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl; diff --git a/compiler/record-minmax/src/CircleExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h index ab00fa860..eeaf2d9bb 100644 --- a/compiler/record-minmax/src/CircleExpContract.h +++ b/compiler/luci/export/include/luci/CircleFileExpContract.h @@ -14,40 +14,52 @@ * limitations under the License. */ -#ifndef __RECORD_MINMAX_CIRCLEXPCONTRACT_H__ -#define __RECORD_MINMAX_CIRCLEXPCONTRACT_H__ +#ifndef __LUCI_CIRCLEFILEEXPCONTRACT_H__ +#define __LUCI_CIRCLEFILEEXPCONTRACT_H__ #include <loco.h> #include <luci/CircleExporter.h> #include <luci/IR/Module.h> +#include <oops/InternalExn.h> #include <string> +#include <fstream> +#include <iostream> -namespace record_minmax +namespace luci { -struct CircleExpContract : public luci::CircleExporter::Contract +struct CircleFileExpContract : public luci::CircleExporter::Contract { public: - CircleExpContract(luci::Module *module, const std::string &filename) + CircleFileExpContract(luci::Module *module, const std::string &filename) : _module(module), _filepath(filename) { // NOTHING TO DO } - virtual ~CircleExpContract() = default; + virtual ~CircleFileExpContract() = default; public: loco::Graph *graph(void) const final { return nullptr; } - luci::Module *module(void) const final { return _module; }; + luci::Module *module(void) const final { return _module; } public: - bool store(const char *ptr, const size_t size) const final; + bool store(const char *ptr, const size_t size) const final + { + if (!ptr) + INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason"); + + std::ofstream fs(_filepath, std::ofstream::binary); + fs.write(ptr, size); + + return fs.good(); + } private: luci::Module *_module; const std::string _filepath; }; -} // namespace record_minmax +} // namespace luci -#endif // __RECORD_MINMAX_CIRCLEXPCONTRACT_H__ +#endif // __LUCI_CIRCLEFILEEXPCONTRACT_H__ diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp index bca122050..36d61f6c9 100644 --- a/compiler/luci/export/src/CircleOperationExporter.cpp +++ b/compiler/luci/export/src/CircleOperationExporter.cpp @@ -38,12 +38,578 @@ namespace using namespace luci; +struct ExportContext +{ + FlatBufferBuilder &builder; + SerializedModelData &md; + SerializedGraphData &gd; +}; + +/** + * @brief Exports CircleMaxPool2D or CircleAveragePool2D + * + * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D + */ +template <class CirclePool2D> +void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op) +{ + LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D || + builtin_op == circle::BuiltinOperator_L2_POOL_2D || + builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D, + "Should be L2Pool, MaxPool or AvgPool"); + LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set"); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + + circle::Padding padding = getOpPadding(node->padding()); + + auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(), + node->filter()->w(), node->filter()->h(), + to_circle_actfunc(node->fusedActivationFunction())); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_Pool2DOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +/** + * @brief export simple nodes + */ +void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop, + circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset) +{ + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec{get_tensor_index(node)}; + for (uint32_t i = 0; i < node->arity(); ++i) + inputs_vec.push_back(get_tensor_index(node->arg(i))); + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset); + ctx.gd._operators.push_back(op_offset); +} + +/** + * @brief export simple nodes having void options + */ +void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop) +{ + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + for (uint32_t i = 0; i < node->arity(); ++i) + inputs_vec.push_back(get_tensor_index(node->arg(i))); + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleAddN *node) +{ + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + + for (uint32_t i = 0; i < node->arity(); ++i) + inputs_vec.push_back(get_tensor_index(node->inputs(i))); + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateAddNOptions(ctx.builder); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_AddNOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleCast *node) +{ + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->x())}; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + + flatbuffers::Offset<Operator> op_offset; + if (node->out_data_type() != loco::DataType::Unknown) + { + auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()), + to_circle_tensortype(node->out_data_type())); + op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_CastOptions, options.Union()); + } + else + { + op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs); + } + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleConcatenation *node) +{ + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + + for (uint32_t i = 0; i < node->numValues(); ++i) + inputs_vec.push_back(get_tensor_index(node->values(i))); + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateConcatenationOptions(ctx.builder, node->axis(), + to_circle_actfunc(node->fusedActivationFunction())); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_ConcatenationOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleCustom *node) +{ + auto custom_outputs = loco::succs(node); + + uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec; + + for (uint32_t index = 0; index < node->numInputs(); index++) + { + inputs_vec.push_back(get_tensor_index(node->inputs(index))); + } + for (uint32_t index = 0; index < custom_outputs.size(); index++) + { + // store in order of index + bool found = false; + for (auto out : custom_outputs) + { + auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out); + if (custom_out->index() == static_cast<int32_t>(index)) + { + outputs_vec.push_back(get_tensor_index(custom_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid Custom output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options; + std::vector<uint8_t> custom_options_vec{node->custom_options().begin(), + node->custom_options().end()}; + circle_custom_options = ctx.builder.CreateVector(custom_options_vec); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE, + flatbuffers::Offset<void>(), circle_custom_options); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleIf *node) +{ + auto if_outs = loco::succs(node); + assert(if_outs.size() == node->output_count()); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec; + + inputs_vec.push_back(get_tensor_index(node->cond())); + for (uint32_t idx = 0; idx < node->input_count(); ++idx) + inputs_vec.push_back(get_tensor_index(node->input(idx))); + + for (uint32_t idx = 0; idx < node->output_count(); ++idx) + { + // store in order of index + bool found = false; + for (auto out : if_outs) + { + auto if_out = loco::must_cast<luci::CircleIfOut *>(out); + if (if_out->index() == static_cast<int32_t>(idx)) + { + outputs_vec.push_back(get_tensor_index(if_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid CircleIf output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch()); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_IfOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node) +{ + auto nms_outs = loco::succs(node); + assert(nms_outs.size() == 2); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, + node->op_version()); + std::vector<int32_t> inputs_vec{ + get_tensor_index(node->boxes()), get_tensor_index(node->scores()), + get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()), + get_tensor_index(node->score_threshold()), + }; + std::vector<int32_t> outputs_vec; + + for (uint32_t idx = 0; idx < nms_outs.size(); ++idx) + { + // store in order of index + bool found = false; + for (auto out : nms_outs) + { + auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out); + if (nms_out->index() == static_cast<int32_t>(idx)) + { + outputs_vec.push_back(get_tensor_index(nms_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid NonMaxSuppressionV4 output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateNonMaxSuppressionV4Options(ctx.builder); + auto op_offset = + CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node) +{ + auto nms_outs = loco::succs(node); + assert(nms_outs.size() == 3); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5, + node->op_version()); + std::vector<int32_t> inputs_vec{ + get_tensor_index(node->boxes()), get_tensor_index(node->scores()), + get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()), + get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()), + }; + std::vector<int32_t> outputs_vec; + + for (uint32_t idx = 0; idx < nms_outs.size(); ++idx) + { + // store in order of index + bool found = false; + for (auto out : nms_outs) + { + auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out); + if (nms_out->index() == static_cast<int32_t>(idx)) + { + outputs_vec.push_back(get_tensor_index(nms_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid NonMaxSuppressionV5 output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateNonMaxSuppressionV5Options(ctx.builder); + auto op_offset = + CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleReverseV2 *node) +{ + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())}; + std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateReverseV2Options(ctx.builder); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_ReverseSequenceOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleSplit *node) +{ + auto split_outs = loco::succs(node); + assert(int32_t(split_outs.size()) == node->num_split()); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version()); + // NOTE BuiltinOperator_SPLIT input is placed at second position + std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()), + get_tensor_index(node->input())}; + std::vector<int32_t> outputs_vec; + + for (int32_t index = 0; index < node->num_split(); index++) + { + // store in order of index + bool found = false; + for (auto out : split_outs) + { + auto split_out = loco::must_cast<luci::CircleSplitOut *>(out); + if (split_out->index() == index) + { + outputs_vec.push_back(get_tensor_index(split_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid Split output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateSplitOptions(ctx.builder, node->num_split()); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_SplitOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleSplitV *node) +{ + auto split_outs = loco::succs(node); + assert(int32_t(split_outs.size()) == node->num_split()); + + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), + get_tensor_index(node->size_splits()), + get_tensor_index(node->split_dim())}; + std::vector<int32_t> outputs_vec; + + for (int32_t index = 0; index < node->num_split(); index++) + { + // store in order of index + bool found = false; + for (auto out : split_outs) + { + auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out); + if (split_out->index() == index) + { + outputs_vec.push_back(get_tensor_index(split_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid SplitV output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateSplitVOptions(ctx.builder, node->num_split()); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_SplitVOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleTopKV2 *node) +{ + auto topkv2_outs = loco::succs(node); + int outs_count = int32_t(topkv2_outs.size()); + assert(outs_count == 2); + + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())}; + std::vector<int32_t> outputs_vec; + + for (int32_t index = 0; index < outs_count; index++) + { + // store in order of index + bool found = false; + for (auto out : topkv2_outs) + { + auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out); + if (topkv2_out->index() == index) + { + outputs_vec.push_back(get_tensor_index(topkv2_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid TopKV2 output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateTopKV2Options(ctx.builder); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_TopKV2Options, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleUnique *node) +{ + auto unique_outs = loco::succs(node); + assert(int32_t(unique_outs.size()) == 2); + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version()); + + std::vector<int32_t> inputs_vec{get_tensor_index(node->input())}; + std::vector<int32_t> outputs_vec; + + for (int32_t index = 0; index < 2; index++) + { + // store in order of index + bool found = false; + for (auto out : unique_outs) + { + auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out); + if (unique_out->index() == index) + { + outputs_vec.push_back(get_tensor_index(unique_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid Unique output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type())); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_UniqueOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleUnpack *node) +{ + LOGGER(l); + auto settings = luci::UserSettings::settings(); + + auto unpack_outs = loco::succs(node); + // NOTE real models may not use all of the outputs + if (static_cast<int32_t>(unpack_outs.size()) != node->num()) + { + if (settings->get(luci::UserSettings::Key::DisableValidation)) + { + WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs"; + } + else + assert(false); + } + + uint32_t op_idx = + ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version()); + std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; + std::vector<int32_t> outputs_vec; + + for (int32_t index = 0; index < node->num(); index++) + { + // store in order of index + bool found = false; + for (auto out : unpack_outs) + { + auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out); + if (unpack_out->index() == index) + { + outputs_vec.push_back(get_tensor_index(unpack_out)); + found = true; + break; + } + } + // NOTE real models may not use all of the outputs + if (!found) + { + if (settings->get(luci::UserSettings::Key::DisableValidation)) + { + WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used"; + } + else + assert(false); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis()); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_UnpackOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + +void export_node(ExportContext &ctx, luci::CircleWhile *node) +{ + auto while_outs = loco::succs(node); + assert(while_outs.size() == node->output_count()); + + uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version()); + std::vector<int32_t> inputs_vec; + std::vector<int32_t> outputs_vec; + + for (uint32_t idx = 0; idx < node->input_count(); ++idx) + inputs_vec.push_back(get_tensor_index(node->input(idx))); + + for (uint32_t idx = 0; idx < node->output_count(); ++idx) + { + // store in order of index + bool found = false; + for (auto out : while_outs) + { + auto while_out = loco::must_cast<luci::CircleWhileOut *>(out); + if (while_out->index() == static_cast<int32_t>(idx)) + { + outputs_vec.push_back(get_tensor_index(while_out)); + found = true; + break; + } + } + if (!found) + { + INTERNAL_EXN("Invalid CircleWhile output"); + } + } + + auto inputs = ctx.builder.CreateVector(inputs_vec); + auto outputs = ctx.builder.CreateVector(outputs_vec); + auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch()); + auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, + circle::BuiltinOptions_WhileOptions, options.Union()); + ctx.gd._operators.push_back(op_offset); +} + class OperationExporter final : public luci::CircleNodeMutableVisitor<void>, public loco::CanonicalNodeMutableVisitor<void> { public: - OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g) - : builder{fbb}, md{m}, gd{g} + OperationExporter(ExportContext &ctx) : _ctx{ctx} { // DO NOTHING } @@ -103,10 +669,12 @@ public: void visit(luci::CircleMul *) final; void visit(luci::CircleNeg *) final; void visit(luci::CircleNonMaxSuppressionV4 *) final; + void visit(luci::CircleNonMaxSuppressionV5 *) final; void visit(luci::CircleNotEqual *) final; void visit(luci::CircleOneHot *) final; void visit(luci::CirclePack *) final; void visit(luci::CirclePad *) final; + void visit(luci::CirclePadV2 *) final; void visit(luci::CirclePow *) final; void visit(luci::CirclePRelu *) final; void visit(luci::CircleRange *) final; @@ -168,6 +736,7 @@ public: void visit(luci::CircleCustomOut *) final {} void visit(luci::CircleIfOut *) final {} void visit(luci::CircleNonMaxSuppressionV4Out *) final {} + void visit(luci::CircleNonMaxSuppressionV5Out *) final {} void visit(luci::CircleSplitOut *) final {} void visit(luci::CircleSplitVOut *) final {} void visit(luci::CircleTopKV2Out *) final {} @@ -177,14 +746,6 @@ public: private: /** - * @brief Exports CircleMaxPool2D or CircleAveragePool2D - * - * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D - */ - template <class CirclePool2D> - void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op); - - /** * @brief export simple nodes */ void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot, @@ -196,179 +757,83 @@ private: void export_simple(loco::Node *node, circle::BuiltinOperator bop); private: - FlatBufferBuilder &builder; - SerializedModelData &md; - SerializedGraphData &gd; + ExportContext &_ctx; }; -template <class CirclePool2D> -void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op) -{ - LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D || - builtin_op == circle::BuiltinOperator_L2_POOL_2D || - builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D, - "Should be L2Pool, MaxPool or AvgPool"); - LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set"); - - uint32_t op_idx = md.registerBuiltinOpcode(builtin_op, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - - circle::Padding padding = getOpPadding(node->padding()); - - auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(), - node->filter()->w(), node->filter()->h(), - to_circle_actfunc(node->fusedActivationFunction())); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_Pool2DOptions, options.Union()); - gd._operators.push_back(op_offset); -} - void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset) { - uint32_t op_idx = - md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(node)}; - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->arg(i))); - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, bot, options_offset); - gd._operators.push_back(op_offset); + export_node(_ctx, node, bop, bot, options_offset); } void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop) { - uint32_t op_idx = - md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->arg(i))); - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs); - gd._operators.push_back(op_offset); + export_node(_ctx, node, bop); } void OperationExporter::visit(luci::CircleAbs *node) { export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions, - CreateAbsOptions(builder).Union()); + CreateAbsOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleAdd *node) { export_simple( node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions, - CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); + CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); } -void OperationExporter::visit(luci::CircleAddN *node) -{ - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->inputs(i))); - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateAddNOptions(builder); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_AddNOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleArgMax *node) { - export_simple(node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions, - CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type())).Union()); + export_simple( + node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions, + CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union()); } void OperationExporter::visit(luci::CircleArgMin *node) { - export_simple(node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions, - CreateArgMinOptions(builder, to_circle_tensortype(node->output_type())).Union()); + export_simple( + node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions, + CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union()); } void OperationExporter::visit(luci::CircleAveragePool2D *node) { - export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D); + export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D); } void OperationExporter::visit(luci::CircleBatchMatMul *node) { export_simple(node, circle::BuiltinOperator_BATCH_MATMUL, circle::BuiltinOptions_BatchMatMulOptions, - CreateBatchMatMulOptions(builder, node->adj_x(), node->adj_y()).Union()); + CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union()); } -void OperationExporter::visit(luci::CircleCast *node) -{ - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->x())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - - flatbuffers::Offset<Operator> op_offset; - if (node->out_data_type() != loco::DataType::Unknown) - { - auto options = CreateCastOptions(builder, to_circle_tensortype(node->in_data_type()), - to_circle_tensortype(node->out_data_type())); - op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_CastOptions, - options.Union()); - } - else - { - op_offset = CreateOperator(builder, op_idx, inputs, outputs); - } - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleCeil *node) { export_simple(node, circle::BuiltinOperator_CEIL); } -void OperationExporter::visit(luci::CircleConcatenation *node) -{ - uint32_t op_idx = - md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - - for (uint32_t i = 0; i < node->numValues(); ++i) - inputs_vec.push_back(get_tensor_index(node->values(i))); - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateConcatenationOptions(builder, node->axis(), - to_circle_actfunc(node->fusedActivationFunction())); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_ConcatenationOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleBatchToSpaceND *node) { export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND, circle::BuiltinOptions_BatchToSpaceNDOptions, - CreateBatchToSpaceNDOptions(builder).Union()); + CreateBatchToSpaceNDOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleConv2D *node) { export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions, - CreateConv2DOptions(builder, getOpPadding(node->padding()), node->stride()->w(), - node->stride()->h(), + CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()), + node->stride()->w(), node->stride()->h(), to_circle_actfunc(node->fusedActivationFunction()), node->dilation()->w(), node->dilation()->h()) .Union()); @@ -377,64 +842,23 @@ void OperationExporter::visit(luci::CircleConv2D *node) void OperationExporter::visit(luci::CircleCos *node) { export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions, - CreateCosOptions(builder).Union()); + CreateCosOptions(_ctx.builder).Union()); } -void OperationExporter::visit(luci::CircleCustom *node) -{ - auto custom_outputs = loco::succs(node); - - uint32_t op_idx = md.registerCustomOpcode(node->custom_code()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - for (uint32_t index = 0; index < node->numInputs(); index++) - { - inputs_vec.push_back(get_tensor_index(node->inputs(index))); - } - for (uint32_t index = 0; index < custom_outputs.size(); index++) - { - // store in order of index - bool found = false; - for (auto out : custom_outputs) - { - auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out); - if (custom_out->index() == static_cast<int32_t>(index)) - { - outputs_vec.push_back(get_tensor_index(custom_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Custom output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options; - std::vector<uint8_t> custom_options_vec{node->custom_options().begin(), - node->custom_options().end()}; - circle_custom_options = builder.CreateVector(custom_options_vec); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE, - flatbuffers::Offset<void>(), circle_custom_options); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleDepthToSpace *node) { export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE, circle::BuiltinOptions_DepthToSpaceOptions, - CreateDepthToSpaceOptions(builder, node->block_size()).Union()); + CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union()); } void OperationExporter::visit(luci::CircleDepthwiseConv2D *node) { export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions, - CreateDepthwiseConv2DOptions(builder, getOpPadding(node->padding()), + CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h(), node->depthMultiplier(), to_circle_actfunc(node->fusedActivationFunction()), @@ -446,7 +870,7 @@ void OperationExporter::visit(luci::CircleDiv *node) { export_simple( node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions, - CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); + CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); } void OperationExporter::visit(luci::CircleElu *node) @@ -457,25 +881,25 @@ void OperationExporter::visit(luci::CircleElu *node) void OperationExporter::visit(luci::CircleEqual *node) { export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions, - CreateEqualOptions(builder).Union()); + CreateEqualOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleExp *node) { export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions, - CreateExpOptions(builder).Union()); + CreateExpOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleExpandDims *node) { export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions, - CreateExpandDimsOptions(builder).Union()); + CreateExpandDimsOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleFill *node) { export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions, - CreateFillOptions(builder).Union()); + CreateFillOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleFloor *node) @@ -486,124 +910,86 @@ void OperationExporter::visit(luci::CircleFloor *node) void OperationExporter::visit(luci::CircleFloorDiv *node) { export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions, - CreateFloorDivOptions(builder).Union()); + CreateFloorDivOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleFloorMod *node) { export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions, - CreateFloorModOptions(builder).Union()); + CreateFloorModOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleFullyConnected *node) { export_simple( node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions, - CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction())) + CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())) .Union()); } void OperationExporter::visit(luci::CircleGather *node) { export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions, - CreateGatherOptions(builder, node->axis()).Union()); + CreateGatherOptions(_ctx.builder, node->axis()).Union()); } void OperationExporter::visit(luci::CircleGatherNd *node) { export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions, - CreateGatherNdOptions(builder).Union()); + CreateGatherNdOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleGreater *node) { export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions, - CreateGreaterOptions(builder).Union()); + CreateGreaterOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleGreaterEqual *node) { export_simple(node, circle::BuiltinOperator_GREATER_EQUAL, circle::BuiltinOptions_GreaterEqualOptions, - CreateGreaterEqualOptions(builder).Union()); + CreateGreaterEqualOptions(_ctx.builder).Union()); } -void OperationExporter::visit(luci::CircleIf *node) -{ - auto if_outs = loco::succs(node); - assert(if_outs.size() == node->output_count()); - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - inputs_vec.push_back(get_tensor_index(node->cond())); - for (uint32_t idx = 0; idx < node->input_count(); ++idx) - inputs_vec.push_back(get_tensor_index(node->input(idx))); - - for (uint32_t idx = 0; idx < node->output_count(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : if_outs) - { - auto if_out = loco::must_cast<luci::CircleIfOut *>(out); - if (if_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(if_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid CircleIf output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateIfOptions(builder, node->then_branch(), node->else_branch()); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_IfOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleL2Normalize *node) { export_simple( node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions, - CreateL2NormOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); + CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union()); } void OperationExporter::visit(luci::CircleL2Pool2D *node) { - export_pool_2d<luci::CircleL2Pool2D>(node, circle::BuiltinOperator_L2_POOL_2D); + export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D); } void OperationExporter::visit(luci::CircleLeakyRelu *node) { export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions, - CreateLeakyReluOptions(builder, node->alpha()).Union()); + CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union()); } void OperationExporter::visit(luci::CircleLess *node) { export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions, - CreateLessOptions(builder).Union()); + CreateLessOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleLessEqual *node) { export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions, - CreateLessEqualOptions(builder).Union()); + CreateLessEqualOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleLocalResponseNormalization *node) { export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, circle::BuiltinOptions_LocalResponseNormalizationOptions, - CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(), + CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(), node->alpha(), node->beta()) .Union()); } @@ -616,19 +1002,19 @@ void OperationExporter::visit(luci::CircleLog *node) void OperationExporter::visit(luci::CircleLogicalAnd *node) { export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions, - CreateLogicalAndOptions(builder).Union()); + CreateLogicalAndOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleLogicalNot *node) { export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions, - CreateLogicalNotOptions(builder).Union()); + CreateLogicalNotOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleLogicalOr *node) { export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions, - CreateLogicalOrOptions(builder).Union()); + CreateLogicalOrOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleLogistic *node) @@ -639,135 +1025,103 @@ void OperationExporter::visit(luci::CircleLogistic *node) void OperationExporter::visit(luci::CircleLogSoftmax *node) { export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions, - CreateLogSoftmaxOptions(builder).Union()); + CreateLogSoftmaxOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleMatrixDiag *node) { export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions, - CreateMatrixDiagOptions(builder).Union()); + CreateMatrixDiagOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleMatrixSetDiag *node) { export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG, circle::BuiltinOptions_MatrixSetDiagOptions, - CreateMatrixSetDiagOptions(builder).Union()); + CreateMatrixSetDiagOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleMaximum *node) { export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions, - CreateMaximumMinimumOptions(builder).Union()); + CreateMaximumMinimumOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleMaxPool2D *node) { - export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D); + export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D); } void OperationExporter::visit(luci::CircleMean *node) { export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleMinimum *node) { export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions, - CreateMaximumMinimumOptions(builder).Union()); + CreateMaximumMinimumOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleMirrorPad *node) { - export_simple(node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions, - CreateMirrorPadOptions(builder, to_circle_mirrorpadmode(node->mode())).Union()); + export_simple( + node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions, + CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union()); } void OperationExporter::visit(luci::CircleMul *node) { export_simple( node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions, - CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); + CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); } void OperationExporter::visit(luci::CircleNeg *node) { export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions, - CreateNegOptions(builder).Union()); + CreateNegOptions(_ctx.builder).Union()); } -void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) -{ - auto nms_outs = loco::succs(node); - assert(nms_outs.size() == 2); +void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); } - uint32_t op_idx = - md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version()); - std::vector<int32_t> inputs_vec{ - get_tensor_index(node->boxes()), get_tensor_index(node->scores()), - get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()), - get_tensor_index(node->score_threshold()), - }; - std::vector<int32_t> outputs_vec; - - for (uint32_t idx = 0; idx < nms_outs.size(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : nms_outs) - { - auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out); - if (nms_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(nms_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid NonMaxSuppressionV4 output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateNonMaxSuppressionV4Options(builder); - auto op_offset = - CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleNotEqual *node) { export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions, - CreateNotEqualOptions(builder).Union()); + CreateNotEqualOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleOneHot *node) { export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions, - CreateOneHotOptions(builder, node->axis()).Union()); + CreateOneHotOptions(_ctx.builder, node->axis()).Union()); } void OperationExporter::visit(luci::CirclePack *node) { export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions, - CreatePackOptions(builder, node->values_count(), node->axis()).Union()); + CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union()); } void OperationExporter::visit(luci::CirclePad *node) { export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions, - CreatePadOptions(builder).Union()); + CreatePadOptions(_ctx.builder).Union()); +} + +void OperationExporter::visit(luci::CirclePadV2 *node) +{ + export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options, + CreatePadV2Options(_ctx.builder).Union()); } void OperationExporter::visit(luci::CirclePow *node) { export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions, - CreatePowOptions(builder).Union()); + CreatePowOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CirclePRelu *node) @@ -778,37 +1132,37 @@ void OperationExporter::visit(luci::CirclePRelu *node) void OperationExporter::visit(luci::CircleRange *node) { export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions, - CreateRangeOptions(builder).Union()); + CreateRangeOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleRank *node) { export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions, - CreateRankOptions(builder).Union()); + CreateRankOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleReduceAny *node) { export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleReduceMax *node) { export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleReduceMin *node) { export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleReduceProd *node) { export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleRelu *node) @@ -828,18 +1182,18 @@ void OperationExporter::visit(luci::CircleReluN1To1 *node) void OperationExporter::visit(luci::CircleReshape *node) { - auto new_shape = builder.CreateVector<int32_t>( + auto new_shape = _ctx.builder.CreateVector<int32_t>( node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); }); export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions, - CreateReshapeOptions(builder, new_shape).Union()); + CreateReshapeOptions(_ctx.builder, new_shape).Union()); } void OperationExporter::visit(luci::CircleResizeBilinear *node) { export_simple( node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions, - CreateResizeBilinearOptions(builder, node->align_corners(), node->half_pixel_centers()) + CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers()) .Union()); } @@ -847,29 +1201,17 @@ void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node) { export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, circle::BuiltinOptions_ResizeNearestNeighborOptions, - CreateResizeNearestNeighborOptions(builder, node->align_corners()).Union()); + CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union()); } void OperationExporter::visit(luci::CircleReverseSequence *node) { export_simple( node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions, - CreateReverseSequenceOptions(builder, node->seq_axis(), node->batch_axis()).Union()); + CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union()); } -void OperationExporter::visit(luci::CircleReverseV2 *node) -{ - uint32_t op_idx = - md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateReverseV2Options(builder); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_ReverseSequenceOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleRound *node) { @@ -884,31 +1226,31 @@ void OperationExporter::visit(luci::CircleRsqrt *node) void OperationExporter::visit(luci::CircleScatterNd *node) { export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions, - CreateScatterNdOptions(builder).Union()); + CreateScatterNdOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSegmentSum *node) { export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions, - CreateSegmentSumOptions(builder).Union()); + CreateSegmentSumOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSelect *node) { export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions, - CreateSelectOptions(builder).Union()); + CreateSelectOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSelectV2 *node) { export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options, - CreateSelectV2Options(builder).Union()); + CreateSelectV2Options(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleShape *node) { export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions, - CreateShapeOptions(builder, to_circle_tensortype(node->out_type())).Union()); + CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union()); } void OperationExporter::visit(luci::CircleSin *node) @@ -919,113 +1261,39 @@ void OperationExporter::visit(luci::CircleSin *node) void OperationExporter::visit(luci::CircleSlice *node) { export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions, - CreateSliceOptions(builder).Union()); + CreateSliceOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSoftmax *node) { export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions, - CreateSoftmaxOptions(builder, node->beta()).Union()); + CreateSoftmaxOptions(_ctx.builder, node->beta()).Union()); } void OperationExporter::visit(luci::CircleSpaceToBatchND *node) { export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND, circle::BuiltinOptions_SpaceToBatchNDOptions, - CreateSpaceToBatchNDOptions(builder).Union()); + CreateSpaceToBatchNDOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSpaceToDepth *node) { export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH, circle::BuiltinOptions_SpaceToDepthOptions, - CreateSpaceToDepthOptions(builder, node->block_size()).Union()); + CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union()); } void OperationExporter::visit(luci::CircleSparseToDense *node) { export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE, circle::BuiltinOptions_SparseToDenseOptions, - CreateSparseToDenseOptions(builder, node->validate_indices()).Union()); + CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union()); } -void OperationExporter::visit(luci::CircleSplit *node) -{ - auto split_outs = loco::succs(node); - assert(int32_t(split_outs.size()) == node->num_split()); - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version()); - // NOTE BuiltinOperator_SPLIT input is placed at second position - std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()), - get_tensor_index(node->input())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num_split(); index++) - { - // store in order of index - bool found = false; - for (auto out : split_outs) - { - auto split_out = loco::must_cast<luci::CircleSplitOut *>(out); - if (split_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(split_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Split output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateSplitOptions(builder, node->num_split()); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_SplitOptions, options.Union()); - gd._operators.push_back(op_offset); -} - -void OperationExporter::visit(luci::CircleSplitV *node) -{ - auto split_outs = loco::succs(node); - assert(int32_t(split_outs.size()) == node->num_split()); - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), - get_tensor_index(node->size_splits()), - get_tensor_index(node->split_dim())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num_split(); index++) - { - // store in order of index - bool found = false; - for (auto out : split_outs) - { - auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out); - if (split_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(split_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid SplitV output"); - } - } +void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); } - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateSplitVOptions(builder, node->num_split()); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_SplitVOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleSqrt *node) { @@ -1035,28 +1303,28 @@ void OperationExporter::visit(luci::CircleSqrt *node) void OperationExporter::visit(luci::CircleSquare *node) { export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions, - CreateSquareOptions(builder).Union()); + CreateSquareOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSquaredDifference *node) { export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE, circle::BuiltinOptions_SquaredDifferenceOptions, - CreateSquaredDifferenceOptions(builder).Union()); + CreateSquaredDifferenceOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleSqueeze *node) { - auto squeeze_dims = builder.CreateVector<int32_t>(node->squeeze_dims()); + auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims()); export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions, - CreateSqueezeOptions(builder, squeeze_dims).Union()); + CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union()); } void OperationExporter::visit(luci::CircleStridedSlice *node) { export_simple(node, circle::BuiltinOperator_STRIDED_SLICE, circle::BuiltinOptions_StridedSliceOptions, - CreateStridedSliceOptions(builder, node->begin_mask(), node->end_mask(), + CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(), node->ellipsis_mask(), node->new_axis_mask(), node->shrink_axis_mask()) .Union()); @@ -1066,13 +1334,13 @@ void OperationExporter::visit(luci::CircleSub *node) { export_simple( node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions, - CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); + CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); } void OperationExporter::visit(luci::CircleSum *node) { export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(builder, node->keep_dims()).Union()); + CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); } void OperationExporter::visit(luci::CircleTanh *node) @@ -1083,226 +1351,65 @@ void OperationExporter::visit(luci::CircleTanh *node) void OperationExporter::visit(luci::CircleTile *node) { export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions, - CreateTileOptions(builder).Union()); + CreateTileOptions(_ctx.builder).Union()); } -void OperationExporter::visit(luci::CircleTopKV2 *node) -{ - auto topkv2_outs = loco::succs(node); - int outs_count = int32_t(topkv2_outs.size()); - assert(outs_count == 2); - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < outs_count; index++) - { - // store in order of index - bool found = false; - for (auto out : topkv2_outs) - { - auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out); - if (topkv2_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(topkv2_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid TopKV2 output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateTopKV2Options(builder); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_TopKV2Options, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleTranspose *node) { export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions, - CreateTransposeOptions(builder).Union()); + CreateTransposeOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleTransposeConv *node) { export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV, circle::BuiltinOptions_TransposeConvOptions, - CreateTransposeConvOptions(builder, getOpPadding(node->padding()), + CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h()) .Union()); } -void OperationExporter::visit(luci::CircleUnique *node) -{ - auto unique_outs = loco::succs(node); - assert(int32_t(unique_outs.size()) == 2); - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version()); +void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); } - std::vector<int32_t> inputs_vec{get_tensor_index(node->input())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < 2; index++) - { - // store in order of index - bool found = false; - for (auto out : unique_outs) - { - auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out); - if (unique_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(unique_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Unique output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type())); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_UniqueOptions, options.Union()); - gd._operators.push_back(op_offset); -} - -void OperationExporter::visit(luci::CircleUnpack *node) -{ - LOGGER(l); - auto settings = luci::UserSettings::settings(); - - auto unpack_outs = loco::succs(node); - // NOTE real models may not use all of the outputs - if (static_cast<int32_t>(unpack_outs.size()) != node->num()) - { - if (settings->get(luci::UserSettings::Key::DisableValidation)) - { - WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs"; - } - else - assert(false); - } - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num(); index++) - { - // store in order of index - bool found = false; - for (auto out : unpack_outs) - { - auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out); - if (unpack_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(unpack_out)); - found = true; - break; - } - } - // NOTE real models may not use all of the outputs - if (!found) - { - if (settings->get(luci::UserSettings::Key::DisableValidation)) - { - WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used"; - } - else - assert(false); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateUnpackOptions(builder, node->num(), node->axis()); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_UnpackOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleWhere *node) { export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions, - CreateWhereOptions(builder).Union()); + CreateWhereOptions(_ctx.builder).Union()); } -void OperationExporter::visit(luci::CircleWhile *node) -{ - auto while_outs = loco::succs(node); - assert(while_outs.size() == node->output_count()); - - uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - for (uint32_t idx = 0; idx < node->input_count(); ++idx) - inputs_vec.push_back(get_tensor_index(node->input(idx))); - - for (uint32_t idx = 0; idx < node->output_count(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : while_outs) - { - auto while_out = loco::must_cast<luci::CircleWhileOut *>(out); - if (while_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(while_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid CircleWhile output"); - } - } - - auto inputs = builder.CreateVector(inputs_vec); - auto outputs = builder.CreateVector(outputs_vec); - auto options = CreateWhileOptions(builder, node->cond_branch(), node->body_branch()); - auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, - circle::BuiltinOptions_WhileOptions, options.Union()); - gd._operators.push_back(op_offset); -} +void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); } void OperationExporter::visit(luci::CircleZerosLike *node) { export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions, - CreateZerosLikeOptions(builder).Union()); + CreateZerosLikeOptions(_ctx.builder).Union()); } void OperationExporter::visit(luci::CircleBCQFullyConnected *node) { export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED, circle::BuiltinOptions_BCQFullyConnectedOptions, - CreateBCQFullyConnectedOptions(builder, node->weights_hidden_size(), + CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(), to_circle_actfunc(node->fusedActivationFunction())) .Union()); } void OperationExporter::visit(luci::CircleBCQGather *node) { - export_simple(node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions, - CreateBCQGatherOptions(builder, node->input_hidden_size(), node->axis()).Union()); + export_simple( + node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions, + CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union()); } void OperationExporter::visit(luci::CircleInstanceNorm *node) { export_simple(node, circle::BuiltinOperator_INSTANCE_NORM, circle::BuiltinOptions_InstanceNormOptions, - CreateInstanceNormOptions(builder, node->epsilon(), + CreateInstanceNormOptions(_ctx.builder, node->epsilon(), to_circle_actfunc(node->fusedActivationFunction())) .Union()); } @@ -1312,7 +1419,8 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria { if (auto circle_node = dynamic_cast<luci::CircleNode *>(node)) { - OperationExporter exporter{builder, md, gd}; + ExportContext ctx{builder, md, gd}; + OperationExporter exporter{ctx}; circle_node->accept(&exporter); } else diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h index 825c2147d..0b21d380f 100644 --- a/compiler/luci/import/include/luci/Import/Nodes.h +++ b/compiler/luci/import/include/luci/Import/Nodes.h @@ -74,10 +74,12 @@ #include "Nodes/CircleMul.h" #include "Nodes/CircleNeg.h" #include "Nodes/CircleNonMaxSuppressionV4.h" +#include "Nodes/CircleNonMaxSuppressionV5.h" #include "Nodes/CircleNotEqual.h" #include "Nodes/CircleOneHot.h" #include "Nodes/CirclePack.h" #include "Nodes/CirclePad.h" +#include "Nodes/CirclePadV2.h" #include "Nodes/CirclePow.h" #include "Nodes/CirclePRelu.h" #include "Nodes/CircleRange.h" diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h new file mode 100644 index 000000000..62be0758e --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ +#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ + +#include "luci/Import/GraphBuilderBase.h" + +namespace luci +{ + +class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase +{ +public: + bool validate(const ValidateArgs &args) const final; + + void build(const circle::OperatorT &op, GraphBuilderContext *context) const final; +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h new file mode 100644 index 000000000..089f52c81 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_PADV2_H__ +#define __LUCI_IMPORT_OP_CIRCLE_PADV2_H__ + +#include "luci/Import/GraphBuilder.h" + +namespace luci +{ + +class CirclePadV2GraphBuilder : public GraphBuilder +{ +public: + bool validate(const ValidateArgs &args) const final; + +private: + CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const final; +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_PADV2_H__ diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp index cc328cc16..c6bcacb54 100644 --- a/compiler/luci/import/src/GraphBuilderRegistry.cpp +++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp @@ -83,10 +83,12 @@ GraphBuilderRegistry::GraphBuilderRegistry() CIRCLE_NODE(MUL, CircleMulGraphBuilder); // 18 CIRCLE_NODE(NEG, CircleNegGraphBuilder); // 59 CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder); // 120, + CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder); // 121, CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder); // 72 CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder); // 85 CIRCLE_NODE(PACK, CirclePackGraphBuilder); // 83 CIRCLE_NODE(PAD, CirclePadGraphBuilder); // 34 + CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder); // 60 CIRCLE_NODE(POW, CirclePowGraphBuilder); // 78 CIRCLE_NODE(PRELU, CirclePReluGraphBuilder); // 54, CIRCLE_NODE(RANGE, CircleRangeGraphBuilder); // 96 @@ -155,11 +157,9 @@ GraphBuilderRegistry::GraphBuilderRegistry() // BuiltinOperator_DELEGATE = 51, // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, // BuiltinOperator_ARG_MAX = 56, - // BuiltinOperator_PADV2 = 60, // BuiltinOperator_FAKE_QUANT = 80, // BuiltinOperator_QUANTIZE = 114, // BuiltinOperator_HARD_SWISH = 117, - // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121, // BuiltinOperator_DENSIFY = 124, } diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp index 8c2039fff..7faab141c 100644 --- a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp +++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp @@ -18,49 +18,16 @@ #include <luci/IR/Nodes/CircleBatchToSpaceND.h> -#include <loco.h> +#include "ValidateHelpers.h" -#include <cassert> +#include <loco.h> namespace luci { bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - if (inputs.size() != 3) - return false; - - // input 1 and 2 should have INT32/INT64 type - const auto &tensors = args.reader.tensors(); - const auto &tensor_1 = tensors.at(inputs.at(1)); - switch (tensor_1->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - const auto &tensor_2 = tensors.at(inputs.at(2)); - switch (tensor_2->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - // Only support input shape dimension 3 and 4 only - const auto &tensor_0 = tensors.at(inputs.at(0)); - const auto t_0_s = tensor_0->shape.size(); - if (t_0_s != 3 && t_0_s != 4) - return false; - - // TODO check input shape - - return true; + return validate_batch_space_nd(args); } CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index 7131dc115..fad7a0757 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -118,6 +118,10 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind copy_data<loco::DataType::U8>(buffer, num_elements, const_node); break; + case loco::DataType::S8: + copy_data<loco::DataType::S8>(buffer, num_elements, const_node); + break; + case loco::DataType::S16: copy_data<loco::DataType::S16>(buffer, num_elements, const_node); break; diff --git a/compiler/luci/import/src/Nodes/CircleMaximum.cpp b/compiler/luci/import/src/Nodes/CircleMaximum.cpp index 4d1468f19..805d5bc89 100644 --- a/compiler/luci/import/src/Nodes/CircleMaximum.cpp +++ b/compiler/luci/import/src/Nodes/CircleMaximum.cpp @@ -18,6 +18,8 @@ #include <luci/IR/Nodes/CircleMaximum.h> +#include "ValidateHelpers.h" + #include <loco.h> namespace luci @@ -25,37 +27,7 @@ namespace luci bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - const auto &outputs = args.op.outputs; - - if (inputs.size() != 2) - return false; - - if (outputs.size() != 1) - return false; - - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - - switch (tensor->type) - { - case circle::TensorType_FLOAT16: - case circle::TensorType_FLOAT32: - case circle::TensorType_FLOAT64: - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - if (tensors[inputs.at(1)]->type != tensor->type) - return false; - - if (tensors[outputs[0]]->type != tensor->type) - return false; - - return true; + return validate_minmax(args); } CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleMinimum.cpp b/compiler/luci/import/src/Nodes/CircleMinimum.cpp index 8b4daf197..381039e88 100644 --- a/compiler/luci/import/src/Nodes/CircleMinimum.cpp +++ b/compiler/luci/import/src/Nodes/CircleMinimum.cpp @@ -18,6 +18,8 @@ #include <luci/IR/Nodes/CircleMinimum.h> +#include "ValidateHelpers.h" + #include <loco.h> namespace luci @@ -25,37 +27,7 @@ namespace luci bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - const auto &outputs = args.op.outputs; - - if (inputs.size() != 2) - return false; - - if (outputs.size() != 1) - return false; - - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - - switch (tensor->type) - { - case circle::TensorType_FLOAT16: - case circle::TensorType_FLOAT32: - case circle::TensorType_FLOAT64: - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - if (tensors[inputs.at(1)]->type != tensor->type) - return false; - - if (tensors[outputs[0]]->type != tensor->type) - return false; - - return true; + return validate_minmax(args); } CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp new file mode 100644 index 000000000..241dbf5ff --- /dev/null +++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CircleNonMaxSuppressionV5.h" + +#include <luci/IR/Nodes/CircleNonMaxSuppressionV5.h> +#include <luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h> + +#include <loco.h> +#include <oops/UserExn.h> + +namespace luci +{ + +bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) const +{ + const auto &inputs = args.op.inputs; + const auto &outputs = args.op.outputs; + + if (inputs.size() != 6) + return false; + if (outputs.size() != 3) + return false; + + const auto &tensors = args.reader.tensors(); + const auto &boxes_tensor = tensors.at(inputs[0]); + if (boxes_tensor->shape.size() != 2) + return false; + if (boxes_tensor->shape.at(1) != 4) + return false; + if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0)) + return false; + + if (tensors.at(inputs[2])->type != circle::TensorType_INT32) + return false; + if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32) + return false; + if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32) + return false; + if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32) + return false; + + return true; +} + +/** + * @brief NonMaxSuppressionV5 Node builder + * + * @note Current loco does not provide multiple outputs + * We will create multiple NonMasSuppressionV5Oout nodes to emulate this + */ + +void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op, + GraphBuilderContext *context) const +{ + assert(context != nullptr); + + auto graph = context->graph(); + + const std::vector<int32_t> &inputs = op.inputs; + const std::vector<int32_t> &outputs = op.outputs; + const auto &tensors = context->reader()->tensors(); + const auto &opcodes = context->reader()->opcodes(); + auto tensors_ptr = context->reader()->tensors_ptr(); + assert(tensors_ptr != nullptr); + + std::vector<CircleNode *> input_nodes; + for (const int32_t input_tensor_index : inputs) + { + input_nodes.push_back(context->nodefinder()->node(input_tensor_index)); + } + + // Create CircleNonMaxSuppressionV5 + auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>(); + node->boxes(input_nodes[0]); + node->scores(input_nodes[1]); + node->max_output_size(input_nodes[2]); + node->iou_threshold(input_nodes[3]); + node->score_threshold(input_nodes[4]); + node->soft_nms_sigma(input_nodes[5]); + + assert(outputs.size() == 3); + { + // Let's use name of output 0 as NonMaxSuppressionV5 name + const circle::TensorT &output_tensor = *tensors[outputs[0]]; + node->name(tensor_name(output_tensor)); + node->op_version(opcodes[op.opcode_index].get()->version); + + // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs + } + + // Create virtual outputs of NonMaxSuppressionV5 + for (size_t n = 0; n < outputs.size(); ++n) + { + const circle::TensorT &output_tensor = *tensors[outputs[n]]; + + auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>(); + copy_tensor_attributes(output_tensor, nodeout); + + // mark shape_status + if (tensors_ptr->Get(outputs[n])->shape() == nullptr) + nodeout->shape_status(ShapeStatus::NOSHAPE); + else + nodeout->shape_status(ShapeStatus::VALID); + + nodeout->input(node); + nodeout->index(n); + + context->nodefinder()->enroll(outputs[n], nodeout); + } +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp new file mode 100644 index 000000000..493876e68 --- /dev/null +++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CirclePadV2.h" + +#include <luci/IR/Nodes/CirclePadV2.h> + +#include <loco.h> + +namespace luci +{ + +bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const +{ + if (args.op.inputs.size() != 3) + return false; + + if (args.op.outputs.size() != 1) + return false; + + return true; +} + +CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op, + const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const +{ + auto *node = graph->nodes()->create<CirclePadV2>(); + node->input(inputs[0]); + node->paddings(inputs[1]); + node->constant_values(inputs[2]); + + const auto *options = op.builtin_options.AsPadV2Options(); + (void)options; // There are no options. + + return node; +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp index 05492dbc6..e633abf7d 100644 --- a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp +++ b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp @@ -18,33 +18,14 @@ #include <luci/IR/Nodes/CircleReduceMax.h> +#include "ValidateHelpers.h" + namespace luci { bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - const auto &outputs = args.op.outputs; - - if (inputs.size() != 2) - return false; - - if (outputs.size() != 1) - return false; - - const auto &tensors = args.reader.tensors(); - const auto &tensor_axis = tensors.at(inputs.at(1)); - - switch (tensor_axis->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - return true; + return validate_reduce_minmax(args); } CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op, diff --git a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp index 117d5295a..bfc3001f8 100644 --- a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp +++ b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp @@ -18,33 +18,14 @@ #include <luci/IR/Nodes/CircleReduceMin.h> +#include "ValidateHelpers.h" + namespace luci { bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - const auto &outputs = args.op.outputs; - - if (inputs.size() != 2) - return false; - - if (outputs.size() != 1) - return false; - - const auto &tensors = args.reader.tensors(); - const auto &tensor_axis = tensors.at(inputs.at(1)); - - switch (tensor_axis->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - return true; + return validate_reduce_minmax(args); } CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op, diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp index c1d508e3e..fbf9f6b12 100644 --- a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp +++ b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp @@ -18,49 +18,16 @@ #include <luci/IR/Nodes/CircleSpaceToBatchND.h> -#include <loco.h> +#include "ValidateHelpers.h" -#include <cassert> +#include <loco.h> namespace luci { bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const { - const auto &inputs = args.op.inputs; - if (inputs.size() != 3) - return false; - - // input 1 and 2 should have INT32/INT64 type - const auto &tensors = args.reader.tensors(); - const auto &tensor_1 = tensors.at(inputs.at(1)); - switch (tensor_1->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - const auto &tensor_2 = tensors.at(inputs.at(2)); - switch (tensor_2->type) - { - case circle::TensorType_INT32: - case circle::TensorType_INT64: - break; - default: - return false; - } - - // Only support input shape dimension 3 and 4 only - const auto &tensor_0 = tensors.at(inputs.at(0)); - const auto t_0_s = tensor_0->shape.size(); - if (t_0_s != 3 && t_0_s != 4) - return false; - - // TODO check input shape - - return true; + return validate_batch_space_nd(args); } CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp index 26d575e90..ac756b1f3 100644 --- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp +++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp @@ -42,7 +42,8 @@ CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT node->default_value(inputs.at(3)); const auto *options = op.builtin_options.AsSparseToDenseOptions(); - node->validate_indices(options->validate_indices); + if (options) + node->validate_indices(options->validate_indices); return node; } diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp index ddb196657..c280faaf5 100644 --- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp +++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp @@ -27,7 +27,7 @@ namespace luci bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const { - if (args.op.inputs.size() != 3) + if (args.op.inputs.size() != 3 && args.op.inputs.size() != 4) return false; const auto &inputs = args.op.inputs; @@ -60,6 +60,17 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT node->inputSizes(inputs.at(0)); node->filter(inputs.at(1)); node->outBackprop(inputs.at(2)); + if (inputs.size() == 3) + node->bias(graph->nodes()->create<CircleOutputExclude>()); + else + node->bias(inputs.at(3)); + + if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias())) + { + // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type + // is inserted. + bias->dtype(loco::DataType::FLOAT32); + } const auto *options = op.builtin_options.AsTransposeConvOptions(); node->padding(luci_padding(options->padding)); diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp new file mode 100644 index 000000000..12a6548d6 --- /dev/null +++ b/compiler/luci/import/src/ValidateHelpers.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ValidateHelpers.h" + +namespace luci +{ + +bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args) +{ + const auto &inputs = args.op.inputs; + if (inputs.size() != 3) + return false; + + // input 1 and 2 should have INT32/INT64 type + const auto &tensors = args.reader.tensors(); + const auto &tensor_1 = tensors.at(inputs.at(1)); + switch (tensor_1->type) + { + case circle::TensorType_INT32: + case circle::TensorType_INT64: + break; + default: + return false; + } + const auto &tensor_2 = tensors.at(inputs.at(2)); + switch (tensor_2->type) + { + case circle::TensorType_INT32: + case circle::TensorType_INT64: + break; + default: + return false; + } + + // Only support input shape dimension 3 and 4 only + const auto &tensor_0 = tensors.at(inputs.at(0)); + const auto t_0_s = tensor_0->shape.size(); + if (t_0_s != 3 && t_0_s != 4) + return false; + + // TODO check input shape + + return true; +} + +bool validate_minmax(const GraphBuilderBase::ValidateArgs &args) +{ + const auto &inputs = args.op.inputs; + const auto &outputs = args.op.outputs; + + if (inputs.size() != 2) + return false; + + if (outputs.size() != 1) + return false; + + const auto &tensors = args.reader.tensors(); + const auto &tensor = tensors.at(inputs.at(0)); + + switch (tensor->type) + { + case circle::TensorType_FLOAT16: + case circle::TensorType_FLOAT32: + case circle::TensorType_FLOAT64: + case circle::TensorType_INT32: + case circle::TensorType_INT64: + break; + default: + return false; + } + + if (tensors[inputs.at(1)]->type != tensor->type) + return false; + + if (tensors[outputs[0]]->type != tensor->type) + return false; + + return true; +} + +bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args) +{ + const auto &inputs = args.op.inputs; + const auto &outputs = args.op.outputs; + + if (inputs.size() != 2) + return false; + + if (outputs.size() != 1) + return false; + + const auto &tensors = args.reader.tensors(); + const auto &tensor_axis = tensors.at(inputs.at(1)); + + switch (tensor_axis->type) + { + case circle::TensorType_INT32: + case circle::TensorType_INT64: + break; + default: + return false; + } + + return true; +} + +} // namespace luci diff --git a/compiler/luci/import/src/ValidateHelpers.h b/compiler/luci/import/src/ValidateHelpers.h new file mode 100644 index 000000000..4047b2f08 --- /dev/null +++ b/compiler/luci/import/src/ValidateHelpers.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_VALIDATE_HELPERS_H__ +#define __LUCI_VALIDATE_HELPERS_H__ + +#include "luci/Import/GraphBuilderBase.h" + +/** + * @Note Methods in this file provides helper functions to reduce duplicate codes + */ + +namespace luci +{ + +bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args); +bool validate_minmax(const GraphBuilderBase::ValidateArgs &args); +bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args); + +} // namespace luci + +#endif // __LUCI_VALIDATE_HELPERS_H__ diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h index e57f5bb3e..25b86d2e9 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.h +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h @@ -71,6 +71,7 @@ #include "Nodes/CircleMul.h" #include "Nodes/CircleNeg.h" #include "Nodes/CircleNonMaxSuppressionV4.h" +#include "Nodes/CircleNonMaxSuppressionV5.h" #include "Nodes/CircleNotEqual.h" #include "Nodes/CircleOneHot.h" #include "Nodes/CirclePack.h" @@ -134,6 +135,7 @@ #include "Nodes/CircleCustomOut.h" #include "Nodes/CircleIfOut.h" #include "Nodes/CircleNonMaxSuppressionV4Out.h" +#include "Nodes/CircleNonMaxSuppressionV5Out.h" #include "Nodes/CircleUnpackOut.h" #include "Nodes/CircleUniqueOut.h" #include "Nodes/CircleSplitOut.h" diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst index 801051848..9f0a1b16e 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst @@ -64,6 +64,7 @@ CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad) CIRCLE_NODE(MUL, luci::CircleMul) CIRCLE_NODE(NEG, luci::CircleNeg) CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4) +CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5) CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual) CIRCLE_NODE(ONE_HOT, luci::CircleOneHot) CIRCLE_NODE(PACK, luci::CirclePack) @@ -130,6 +131,7 @@ CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude) CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut) CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut) CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out) +CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out) CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut) CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut) CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out) diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h new file mode 100644 index 000000000..52d682147 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ +#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/LuciNodeMixins.h" + +namespace luci +{ + +/** + * @brief NON_MAX_SUPPRESSION_V5 in Circle + */ +class CircleNonMaxSuppressionV5 final + : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>> +{ +public: + loco::Node *boxes(void) const { return at(0)->node(); } + void boxes(loco::Node *node) { at(0)->node(node); } + + loco::Node *scores(void) const { return at(1)->node(); } + void scores(loco::Node *node) { at(1)->node(node); } + + loco::Node *max_output_size(void) const { return at(2)->node(); } + void max_output_size(loco::Node *node) { at(2)->node(node); } + + loco::Node *iou_threshold(void) const { return at(3)->node(); } + void iou_threshold(loco::Node *node) { at(3)->node(node); } + + loco::Node *score_threshold(void) const { return at(4)->node(); } + void score_threshold(loco::Node *node) { at(4)->node(node); } + + loco::Node *soft_nms_sigma(void) const { return at(5)->node(); } + void soft_nms_sigma(loco::Node *node) { at(5)->node(node); } +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h new file mode 100644 index 000000000..0c6989cc7 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__ +#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/LuciNodeMixins.h" + +namespace luci +{ + +/** + * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle + */ +class CircleNonMaxSuppressionV5Out final + : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>> +{ +public: + CircleNonMaxSuppressionV5Out() = default; + +public: + loco::Node *input(void) const { return at(0)->node(); } + void input(loco::Node *node) { at(0)->node(node); } + +public: + int32_t index(void) const { return _index; } + void index(int32_t index) { _index = index; } + +private: + int32_t _index{-1}; +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h index 9f5051317..7e80304b0 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h @@ -49,7 +49,7 @@ public: void validate_indices(bool validate_indices) { _validate_indices = validate_indices; } private: - bool _validate_indices{true}; + bool _validate_indices{false}; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h index fc638d49f..e355102d6 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h @@ -34,7 +34,8 @@ namespace luci * 'out' acutally means 'out' and 'in' of the this node. */ class CircleTransposeConv final - : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>> + : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>, + public LuciNodeMixin<LuciNodeTrait::Bias> { public: loco::Node *inputSizes(void) const { return at(0)->node(); } @@ -46,6 +47,21 @@ public: loco::Node *outBackprop(void) const { return at(2)->node(); } void outBackprop(Node *node) { at(2)->node(node); } + /** + * @note "bias" is optional. When this node has no conceptual bias, "bias()" + * expected to be `luci::CircleOutputExclude` type. + * + * <Comment on tflite TRANSPOSE_CONV> + * + * (Circle node has no dependency on tflite, but just for information on converting) + * Before TF v2.3.0, tflite TRANSPOSE_CONV didn't support fused bias as argument. + * From TF v2.3.0, tflite TRANSPOSE_CONV supports bias as optional 4th argument. + * + * Ref: https://github.com/tensorflow/tensorflow/commit/43b8f6e710 + */ + loco::Node *bias(void) const override { return at(3)->node(); } + void bias(loco::Node *node) override { at(3)->node(node); } + public: const Padding &padding(void) const { return _padding; } void padding(const Padding &padding) { _padding = padding; } diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp index 17ff853eb..0d02d32dc 100644 --- a/compiler/luci/lang/src/Nodes/CircleConst.cpp +++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp @@ -73,6 +73,7 @@ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst: INSTANTIATE(loco::DataType::S64); INSTANTIATE(loco::DataType::S32); INSTANTIATE(loco::DataType::S16); +INSTANTIATE(loco::DataType::S8); INSTANTIATE(loco::DataType::FLOAT32); INSTANTIATE(loco::DataType::U8); INSTANTIATE(loco::DataType::BOOL); diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp new file mode 100644 index 000000000..ceb74e3df --- /dev/null +++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/Nodes/CircleNonMaxSuppressionV5.h" + +#include "luci/IR/CircleDialect.h" +#include "luci/IR/CircleNodeVisitor.h" + +#include <gtest/gtest.h> + +TEST(CircleNonMaxSuppressionV5Test, constructor) +{ + luci::CircleNonMaxSuppressionV5 nmsv5_node; + + ASSERT_EQ(luci::CircleDialect::get(), nmsv5_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V5, nmsv5_node.opcode()); + + ASSERT_EQ(nullptr, nmsv5_node.boxes()); + ASSERT_EQ(nullptr, nmsv5_node.scores()); + ASSERT_EQ(nullptr, nmsv5_node.max_output_size()); + ASSERT_EQ(nullptr, nmsv5_node.iou_threshold()); + ASSERT_EQ(nullptr, nmsv5_node.score_threshold()); + ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma()); +} + +TEST(CircleNonMaxSuppressionV5Test, input_NEG) +{ + luci::CircleNonMaxSuppressionV5 nmsv5_node; + luci::CircleNonMaxSuppressionV5 node; + + nmsv5_node.boxes(&node); + nmsv5_node.scores(&node); + nmsv5_node.max_output_size(&node); + nmsv5_node.iou_threshold(&node); + nmsv5_node.score_threshold(&node); + nmsv5_node.soft_nms_sigma(&node); + ASSERT_NE(nullptr, nmsv5_node.boxes()); + ASSERT_NE(nullptr, nmsv5_node.scores()); + ASSERT_NE(nullptr, nmsv5_node.max_output_size()); + ASSERT_NE(nullptr, nmsv5_node.iou_threshold()); + ASSERT_NE(nullptr, nmsv5_node.score_threshold()); + ASSERT_NE(nullptr, nmsv5_node.soft_nms_sigma()); + + nmsv5_node.boxes(nullptr); + nmsv5_node.scores(nullptr); + nmsv5_node.max_output_size(nullptr); + nmsv5_node.iou_threshold(nullptr); + nmsv5_node.score_threshold(nullptr); + nmsv5_node.soft_nms_sigma(nullptr); + ASSERT_EQ(nullptr, nmsv5_node.boxes()); + ASSERT_EQ(nullptr, nmsv5_node.scores()); + ASSERT_EQ(nullptr, nmsv5_node.max_output_size()); + ASSERT_EQ(nullptr, nmsv5_node.iou_threshold()); + ASSERT_EQ(nullptr, nmsv5_node.score_threshold()); + ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma()); +} + +TEST(CircleNonMaxSuppressionV5Test, arity_NEG) +{ + luci::CircleNonMaxSuppressionV5 nmsv5_node; + + ASSERT_NO_THROW(nmsv5_node.arg(5)); + ASSERT_THROW(nmsv5_node.arg(6), std::out_of_range); +} + +TEST(CircleNonMaxSuppressionV5Test, visit_mutable_NEG) +{ + struct TestVisitor final : public luci::CircleNodeMutableVisitor<void> + { + }; + + luci::CircleNonMaxSuppressionV5 nmsv5_node; + + TestVisitor tv; + ASSERT_THROW(nmsv5_node.accept(&tv), std::exception); +} + +TEST(CircleNonMaxSuppressionV5Test, visit_NEG) +{ + struct TestVisitor final : public luci::CircleNodeVisitor<void> + { + }; + + luci::CircleNonMaxSuppressionV5 nmsv5_node; + + TestVisitor tv; + ASSERT_THROW(nmsv5_node.accept(&tv), std::exception); +} diff --git a/compiler/circle2circle/src/CircleExpContract.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp index b56b7eedc..7b427ea03 100644 --- a/compiler/circle2circle/src/CircleExpContract.cpp +++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp @@ -14,20 +14,19 @@ * limitations under the License. */ -#include "CircleExpContract.h" +#include "luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h" -#include <oops/InternalExn.h> +#include "luci/IR/CircleDialect.h" -#include <fstream> -#include <iostream> +#include <gtest/gtest.h> -bool CircleExpContract::store(const char *ptr, const size_t size) const +TEST(CircleNonMaxSuppressionV5OutTest, constructor) { - if (!ptr) - INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason"); + luci::CircleNonMaxSuppressionV5Out vout_node; - std::ofstream fs(_filepath.c_str(), std::ofstream::binary); - fs.write(ptr, size); + ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT, vout_node.opcode()); - return fs.good(); + ASSERT_EQ(nullptr, vout_node.input()); + ASSERT_EQ(-1, vout_node.index()); } diff --git a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp index de3cf6e9a..03f612ba7 100644 --- a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp +++ b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp @@ -33,7 +33,7 @@ TEST(CircleSparseToDenseTest, constructor) ASSERT_EQ(nullptr, stb_node.values()); ASSERT_EQ(nullptr, stb_node.default_value()); - ASSERT_EQ(true, stb_node.validate_indices()); + ASSERT_EQ(false, stb_node.validate_indices()); } TEST(CircleSparseToDenseTest, input_NEG) diff --git a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp index 429169744..3e0db803f 100644 --- a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp +++ b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp @@ -69,8 +69,8 @@ TEST(CircleTransposeConvTest, arity_NEG) { luci::CircleTransposeConv trc_node; - ASSERT_NO_THROW(trc_node.arg(2)); - ASSERT_THROW(trc_node.arg(3), std::out_of_range); + ASSERT_NO_THROW(trc_node.arg(3)); + ASSERT_THROW(trc_node.arg(4), std::out_of_range); } TEST(CircleTransposeConvTest, visit_mutable_NEG) diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp index f04a418ef..bb7c73d5f 100644 --- a/compiler/luci/logex/src/FormattedGraph.cpp +++ b/compiler/luci/logex/src/FormattedGraph.cpp @@ -245,10 +245,12 @@ private: IMPLEMENT(luci::CircleMul) IMPLEMENT(luci::CircleNeg) IMPLEMENT(luci::CircleNonMaxSuppressionV4) + IMPLEMENT(luci::CircleNonMaxSuppressionV5) IMPLEMENT(luci::CircleNotEqual) IMPLEMENT(luci::CircleOneHot) IMPLEMENT(luci::CirclePack) IMPLEMENT(luci::CirclePad) + IMPLEMENT(luci::CirclePadV2) IMPLEMENT(luci::CirclePow) IMPLEMENT(luci::CirclePRelu) IMPLEMENT(luci::CircleRange) @@ -306,6 +308,7 @@ private: IMPLEMENT(luci::CircleOutput) IMPLEMENT(luci::CircleIfOut) IMPLEMENT(luci::CircleNonMaxSuppressionV4Out) + IMPLEMENT(luci::CircleNonMaxSuppressionV5Out) IMPLEMENT(luci::CircleSplitOut) IMPLEMENT(luci::CircleSplitVOut) IMPLEMENT(luci::CircleTopKV2Out) @@ -380,192 +383,848 @@ bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeS return true; } -bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node, + locop::NodeSummary &s) { - if (node->dialect() != luci::CircleDialect::get()) - return false; + for (uint32_t i = 0; i < node->arity(); ++i) + s.args().append("inputs", tbl->lookup(node->inputs(i))); + s.state(locop::NodeSummary::State::Complete); + return true; +} -#define CIRCLE_NODE(OPCODE, CLASS) \ - if (dynamic_cast<const CLASS *>(node)) \ - { \ - s.opname(circle_opname(node->opnum())); \ - return summary(dynamic_cast<const CLASS *>(node), s); \ - } -#include <luci/IR/CircleNodes.lst> -#undef CIRCLE_NODE +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node, + locop::NodeSummary &s) +{ + assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - return false; + s.args().append("value", tbl->lookup(node->value())); + s.args().append("filter(h,w)", to_str(node->filter())); + s.args().append("stride(h,w)", to_str(node->stride())); + s.args().append("padding", to_str(node->padding())); + s.args().append("fused", to_str(node->fusedActivationFunction())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node, + locop::NodeSummary &s) { - return use_x(tbl(), node, s); + s.args().append("x", tbl->lookup(node->x())); + s.args().append("y", tbl->lookup(node->y())); + s.args().append("adj_x", to_str(node->adj_x())); + s.args().append("adj_y", to_str(node->adj_y())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node, + locop::NodeSummary &s) { - return use_xy_act(tbl(), node, s); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("block_shape", tbl->lookup(node->block_shape())); + s.args().append("crops", tbl->lookup(node->crops())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node, + locop::NodeSummary &s) { - for (uint32_t i = 0; i < node->arity(); ++i) - s.args().append("inputs", tbl()->lookup(node->inputs(i))); + s.args().append("x", tbl->lookup(node->x())); + s.args().append("in_data_type", to_str(node->in_data_type())); + s.args().append("out_data_type", to_str(node->out_data_type())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node, + locop::NodeSummary &s) +{ + assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); + + for (uint32_t i = 0; i < node->numValues(); ++i) + s.args().append("values", tbl->lookup(node->values(i))); + s.args().append("axis", pepper::str(node->axis())); + s.args().append("fused", to_str(node->fusedActivationFunction())); s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node, + locop::NodeSummary &s) { - return use_ido(tbl(), node, s); + assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); + assert(node->padding() != luci::Padding::UNDEFINED); + + s.args().append("input", tbl->lookup(node->input())); + s.args().append("filter", tbl->lookup(node->filter())); + s.args().append("bias", tbl->lookup(node->bias())); + s.args().append("stride(h,w)", to_str(node->stride())); + s.args().append("dilation(h,w)", to_str(node->dilation())); + s.args().append("padding", to_str(node->padding())); + s.args().append("fused", to_str(node->fusedActivationFunction())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node, + locop::NodeSummary &s) { - return use_ido(tbl(), node, s); + for (uint32_t i = 0; i < node->numInputs(); i++) + { + s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i))); + } + s.args().append("custom_code", node->custom_code()); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("block_size", std::to_string(node->block_size())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node, + locop::NodeSummary &s) { assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); + assert(node->padding() != luci::Padding::UNDEFINED); - s.args().append("value", tbl()->lookup(node->value())); - s.args().append("filter(h,w)", to_str(node->filter())); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("filter", tbl->lookup(node->filter())); + s.args().append("bias", tbl->lookup(node->bias())); s.args().append("stride(h,w)", to_str(node->stride())); + s.args().append("dilation(h,w)", to_str(node->dilation())); s.args().append("padding", to_str(node->padding())); + s.args().append("depthMultiplier", std::to_string(node->depthMultiplier())); s.args().append("fused", to_str(node->fusedActivationFunction())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("axis", tbl->lookup(node->axis())); s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node, + locop::NodeSummary &s) +{ + s.args().append("dims", tbl->lookup(node->dims())); + s.args().append("value", tbl->lookup(node->value())); + s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node, + locop::NodeSummary &s) { - s.args().append("x", tbl()->lookup(node->x())); - s.args().append("y", tbl()->lookup(node->y())); - s.args().append("adj_x", to_str(node->adj_x())); - s.args().append("adj_y", to_str(node->adj_y())); + assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); + + s.args().append("input", tbl->lookup(node->input())); + s.args().append("weights", tbl->lookup(node->weights())); + s.args().append("bias", tbl->lookup(node->bias())); + s.args().append("fused", to_str(node->fusedActivationFunction())); s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node, + locop::NodeSummary &s) { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("block_shape", tbl()->lookup(node->block_shape())); - s.args().append("crops", tbl()->lookup(node->crops())); + s.args().append("params", tbl->lookup(node->params())); + s.args().append("indices", tbl->lookup(node->indices())); + s.args().append("axis", pepper::str(node->axis())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node, + locop::NodeSummary &s) +{ + s.args().append("params", tbl->lookup(node->params())); + s.args().append("indices", tbl->lookup(node->indices())); s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s) +{ + s.args().append("cond", tbl->lookup(node->cond())); + for (uint32_t i = 0; i < node->input_count(); ++i) + s.args().append("input", tbl->lookup(node->input(i))); + if (node->then_graph() != nullptr) + s.args().append("then_graph", node->then_graph()->name()); + else + s.args().append("then_branch", pepper::str(node->then_branch())); + + if (node->else_graph() != nullptr) + s.args().append("else_graph", node->else_graph()->name()); + else + s.args().append("else_branch", pepper::str(node->else_branch())); + + s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node, + locop::NodeSummary &s) { - s.args().append("x", tbl()->lookup(node->x())); - s.args().append("in_data_type", to_str(node->in_data_type())); - s.args().append("out_data_type", to_str(node->out_data_type())); + s.args().append("x", tbl->lookup(node->x())); + s.args().append("fused_activation_function", to_str(node->fusedActivationFunction())); s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node, + locop::NodeSummary &s) { - return use_x(tbl(), node, s); + s.args().append("features", tbl->lookup(node->features())); + s.args().append("alpha", std::to_string(node->alpha())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("radius", pepper::str(node->radius())); + s.args().append("bias", pepper::str(node->bias())); + s.args().append("alpha", pepper::str(node->alpha())); + s.args().append("beta", pepper::str(node->beta())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node, + locop::NodeSummary &s) +{ + s.args().append("logits", tbl->lookup(node->logits())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node, + locop::NodeSummary &s) +{ + s.args().append("diagonal", tbl->lookup(node->diagonal())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("diagonal", tbl->lookup(node->diagonal())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node, + locop::NodeSummary &s) { assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - for (uint32_t i = 0; i < node->numValues(); ++i) - s.args().append("values", tbl()->lookup(node->values(i))); - s.args().append("axis", pepper::str(node->axis())); + s.args().append("value", tbl->lookup(node->value())); + s.args().append("filter(h,w)", to_str(node->filter())); + s.args().append("stride(h,w)", to_str(node->stride())); + s.args().append("padding", to_str(node->padding())); s.args().append("fused", to_str(node->fusedActivationFunction())); s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("paddings", tbl->lookup(node->paddings())); + s.args().append("mode", to_str(node->mode())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node, + locop::NodeSummary &s) +{ + s.args().append("boxes", tbl->lookup(node->boxes())); + s.args().append("scores", tbl->lookup(node->scores())); + s.args().append("max_output_size", tbl->lookup(node->max_output_size())); + s.args().append("iou_threshold", tbl->lookup(node->iou_threshold())); + s.args().append("score_threshold", tbl->lookup(node->score_threshold())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node, + locop::NodeSummary &s) +{ + s.args().append("boxes", tbl->lookup(node->boxes())); + s.args().append("scores", tbl->lookup(node->scores())); + s.args().append("max_output_size", tbl->lookup(node->max_output_size())); + s.args().append("iou_threshold", tbl->lookup(node->iou_threshold())); + s.args().append("score_threshold", tbl->lookup(node->score_threshold())); + s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node, + locop::NodeSummary &s) +{ + s.args().append("indices", tbl->lookup(node->indices())); + s.args().append("depth", tbl->lookup(node->depth())); + s.args().append("on_value", tbl->lookup(node->on_value())); + s.args().append("off_value", tbl->lookup(node->off_value())); + s.args().append("axis", pepper::str(node->axis())); + + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node, + locop::NodeSummary &s) +{ + for (uint32_t i = 0; i < node->values_count(); ++i) + s.args().append("values", tbl->lookup(node->values(i))); + s.args().append("values_count", pepper::str(node->values_count())); + s.args().append("axis", pepper::str(node->axis())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s) { + s.args().append("input", tbl->lookup(node->input())); + s.args().append("paddings", tbl->lookup(node->paddings())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("paddings", tbl->lookup(node->paddings())); + s.args().append("constant_values", tbl->lookup(node->constant_values())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("alpha", tbl->lookup(node->alpha())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node, + locop::NodeSummary &s) +{ + s.args().append("start", tbl->lookup(node->start())); + s.args().append("limit", tbl->lookup(node->limit())); + s.args().append("delta", tbl->lookup(node->delta())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node, + locop::NodeSummary &s) +{ + s.args().append("tensor", tbl->lookup(node->tensor())); + s.args().append("shape", tbl->lookup(node->shape())); + // TODO Show newShape info s.state(locop::NodeSummary::State::PartiallyKnown); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node, + locop::NodeSummary &s) { - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - assert(node->padding() != luci::Padding::UNDEFINED); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("size", tbl->lookup(node->size())); + s.args().append("align_corners", node->align_corners() ? "true" : "false"); + s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false"); + s.state(locop::NodeSummary::State::Complete); + return true; +} - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("filter", tbl()->lookup(node->filter())); - s.args().append("bias", tbl()->lookup(node->bias())); +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("size", tbl->lookup(node->size())); + s.args().append("align_corners", node->align_corners() ? "true" : "false"); + s.state(locop::NodeSummary::State::Complete); + return true; +} - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("dilation(h,w)", to_str(node->dilation())); +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("seq_lengths", tbl->lookup(node->seq_lengths())); + s.args().append("seq_axis", std::to_string(node->seq_axis())); + s.args().append("batch_axis", std::to_string(node->batch_axis())); + s.state(locop::NodeSummary::State::Complete); + return true; +} - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node, + locop::NodeSummary &s) +{ + s.args().append("tensor", tbl->lookup(node->tensor())); + s.args().append("axis", tbl->lookup(node->axis())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node, + locop::NodeSummary &s) +{ + s.args().append("indices", tbl->lookup(node->indices())); + s.args().append("updates", tbl->lookup(node->updates())); + s.args().append("shape", tbl->lookup(node->shape())); s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("segment_ids", tbl->lookup(node->segment_ids())); + s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node, + locop::NodeSummary &s) { - return use_x(tbl(), node, s); + s.args().append("condition", tbl->lookup(node->condition())); + s.args().append("t", tbl->lookup(node->t())); + s.args().append("e", tbl->lookup(node->e())); + s.state(locop::NodeSummary::State::Complete); + return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node, + locop::NodeSummary &s) { - for (uint32_t i = 0; i < node->numInputs(); i++) + s.args().append("condition", tbl->lookup(node->condition())); + s.args().append("t", tbl->lookup(node->t())); + s.args().append("e", tbl->lookup(node->e())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("out_type", to_str(node->out_type())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("begin", tbl->lookup(node->begin())); + s.args().append("size", tbl->lookup(node->size())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node, + locop::NodeSummary &s) +{ + s.args().append("logits", tbl->lookup(node->logits())); + s.args().append("beta", pepper::str(node->beta())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("block_shape", tbl->lookup(node->block_shape())); + s.args().append("paddings", tbl->lookup(node->paddings())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("block_size", pepper::str(node->block_size())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node, + locop::NodeSummary &s) +{ + s.args().append("indices", tbl->lookup(node->indices())); + s.args().append("output_shape", tbl->lookup(node->output_shape())); + s.args().append("values", tbl->lookup(node->values())); + s.args().append("default_value", tbl->lookup(node->default_value())); + s.args().append("Validate_indices", pepper::str(node->validate_indices())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node, + locop::NodeSummary &s) +{ + s.args().append("split_dim", tbl->lookup(node->split_dim())); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("num_split", pepper::str(node->num_split())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("size_splits", tbl->lookup(node->size_splits())); + s.args().append("split_dim", tbl->lookup(node->split_dim())); + s.args().append("num_split", pepper::str(node->num_split())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + + std::stringstream ss{"("}; + for (size_t i = 0; i < node->squeeze_dims().size(); ++i) { - s.args().append("input" + std::to_string(i), tbl()->lookup(node->inputs(i))); + if (i != 0) + ss << ", "; + ss << node->squeeze_dims()[i]; } - s.args().append("custom_code", node->custom_code()); + ss << ")"; + s.args().append("squeeze_dims", ss.str()); s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node, + locop::NodeSummary &s) { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("block_size", std::to_string(node->block_size())); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("begin", tbl->lookup(node->begin())); + s.args().append("end", tbl->lookup(node->end())); + s.args().append("strides", tbl->lookup(node->strides())); + s.args().append("begin_mask", pepper::str(node->begin_mask())); + s.args().append("end_mask", pepper::str(node->end_mask())); + s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask())); + s.args().append("new_axis_mask", pepper::str(node->new_axis_mask())); + s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("multiples", tbl->lookup(node->multiples())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("k", tbl->lookup(node->k())); s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node, + locop::NodeSummary &s) +{ + s.args().append("a", tbl->lookup(node->a())); + s.args().append("perm", tbl->lookup(node->perm())); + s.state(locop::NodeSummary::State::Complete); return true; } -bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node, - locop::NodeSummary &s) const +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node, + locop::NodeSummary &s) { - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); assert(node->padding() != luci::Padding::UNDEFINED); - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("filter", tbl()->lookup(node->filter())); - s.args().append("bias", tbl()->lookup(node->bias())); - + s.args().append("inputSizes", tbl->lookup(node->inputSizes())); + s.args().append("filter", tbl->lookup(node->filter())); + s.args().append("outBackprop", tbl->lookup(node->outBackprop())); s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("dilation(h,w)", to_str(node->dilation())); s.args().append("padding", to_str(node->padding())); - s.args().append("depthMultiplier", std::to_string(node->depthMultiplier())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node, + locop::NodeSummary &s) +{ + s.args().append("input", tbl->lookup(node->input())); + s.args().append("idx_out_type", to_str(node->idx_out_type())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node, + locop::NodeSummary &s) +{ + s.args().append("value", tbl->lookup(node->value())); + s.args().append("num", pepper::str(node->num())); + s.args().append("axis", pepper::str(node->axis())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node, + locop::NodeSummary &s) +{ + s.args().append("condition", tbl->lookup(node->condition())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node, + locop::NodeSummary &s) +{ + for (uint32_t i = 0; i < node->input_count(); ++i) + s.args().append("input", tbl->lookup(node->input(i))); + + if (node->cond_graph() != nullptr) + s.args().append("cond_graph", node->cond_graph()->name()); + else + s.args().append("cond_branch", pepper::str(node->cond_branch())); + + if (node->body_graph() != nullptr) + s.args().append("body_graph", node->body_graph()->name()); + else + s.args().append("body_branch", pepper::str(node->body_branch())); + + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node, + locop::NodeSummary &s) +{ + s.args().append("topkv2", tbl->lookup(node->input())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node, + locop::NodeSummary &s) +{ + s.args().append("unique", tbl->lookup(node->input())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node, + locop::NodeSummary &s) +{ + s.args().append("unpack", tbl->lookup(node->input())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node, + locop::NodeSummary &s) +{ + s.args().append("while", tbl->lookup(node->input())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node, + locop::NodeSummary &s) +{ + s.args().append("from", tbl->lookup(node->from())); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node, + locop::NodeSummary &s) +{ + assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); + s.args().append("input", tbl->lookup(node->input())); + s.args().append("weights_scales", tbl->lookup(node->weights_scales())); + s.args().append("weights_binary", tbl->lookup(node->weights_binary())); + s.args().append("bias", tbl->lookup(node->bias())); + s.args().append("weights_clusters", tbl->lookup(node->weights_clusters())); s.args().append("fused", to_str(node->fusedActivationFunction())); + s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size())); + s.state(locop::NodeSummary::State::Complete); + return true; +} +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node, + locop::NodeSummary &s) +{ + s.args().append("input_scales", tbl->lookup(node->input_scales())); + s.args().append("input_binary", tbl->lookup(node->input_binary())); + s.args().append("indices", tbl->lookup(node->indices())); + s.args().append("input_clusters", tbl->lookup(node->input_clusters())); + s.args().append("axis", pepper::str(node->axis())); + s.args().append("input_hidden_size", pepper::str(node->input_hidden_size())); s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node, + locop::NodeSummary &s) +{ + auto fused = node->fusedActivationFunction(); + assert(fused != luci::FusedActFunc::UNDEFINED); + + s.args().append("input", tbl->lookup(node->input())); + s.args().append("gamma", tbl->lookup(node->gamma())); + s.args().append("beta", tbl->lookup(node->beta())); + s.args().append("epsilon", pepper::str(node->epsilon())); + s.args().append("fused_activation_function", to_str(fused)); + s.state(locop::NodeSummary::State::Complete); + return true; +} + +bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const +{ + if (node->dialect() != luci::CircleDialect::get()) + return false; + +#define CIRCLE_NODE(OPCODE, CLASS) \ + if (dynamic_cast<const CLASS *>(node)) \ + { \ + s.opname(circle_opname(node->opnum())); \ + return summary(dynamic_cast<const CLASS *>(node), s); \ + } +#include <luci/IR/CircleNodes.lst> +#undef CIRCLE_NODE + + return false; +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const +{ + return use_x(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const +{ + return use_xy_act(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const +{ + return use_ido(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const +{ + return use_ido(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} +bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const +{ + return use_x(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const +{ + s.state(locop::NodeSummary::State::PartiallyKnown); return true; } +bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const +{ + return use_x(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); +} + bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const { return use_xy(tbl(), node, s); @@ -584,10 +1243,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeS bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("axis", tbl()->lookup(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const @@ -609,44 +1265,24 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const { - s.args().append("dims", tbl()->lookup(node->dims())); - s.args().append("value", tbl()->lookup(node->value())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node, locop::NodeSummary &s) const { - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("weights", tbl()->lookup(node->weights())); - s.args().append("bias", tbl()->lookup(node->bias())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const { - s.args().append("params", tbl()->lookup(node->params())); - s.args().append("indices", tbl()->lookup(node->indices())); - s.args().append("axis", pepper::str(node->axis())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node, locop::NodeSummary &s) const { - s.args().append("params", tbl()->lookup(node->params())); - s.args().append("indices", tbl()->lookup(node->indices())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const @@ -662,32 +1298,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleGreaterEqual *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const { - s.args().append("cond", tbl()->lookup(node->cond())); - for (uint32_t i = 0; i < node->input_count(); ++i) - s.args().append("input", tbl()->lookup(node->input(i))); - - if (node->then_graph() != nullptr) - s.args().append("then_graph", node->then_graph()->name()); - else - s.args().append("then_branch", pepper::str(node->then_branch())); - - if (node->else_graph() != nullptr) - s.args().append("else_graph", node->else_graph()->name()); - else - s.args().append("else_branch", pepper::str(node->else_branch())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node, locop::NodeSummary &s) const { - s.args().append("x", tbl()->lookup(node->x())); - s.args().append("fused_activation_function", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const @@ -704,22 +1321,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLessEqual *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node, locop::NodeSummary &s) const { - s.args().append("features", tbl()->lookup(node->features())); - s.args().append("alpha", std::to_string(node->alpha())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("radius", pepper::str(node->radius())); - s.args().append("bias", pepper::str(node->bias())); - s.args().append("alpha", pepper::str(node->alpha())); - s.args().append("beta", pepper::str(node->beta())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const @@ -754,26 +1362,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLogistic *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node, locop::NodeSummary &s) const { - s.args().append("logits", tbl()->lookup(node->logits())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node, locop::NodeSummary &s) const { - s.args().append("diagonal", tbl()->lookup(node->diagonal())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("diagonal", tbl()->lookup(node->diagonal())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const @@ -784,17 +1385,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::N bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node, locop::NodeSummary &s) const { - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("value", tbl()->lookup(node->value())); - s.args().append("filter(h,w)", to_str(node->filter())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const @@ -810,11 +1401,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMinimum *node, locop::N bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("paddings", tbl()->lookup(node->paddings())); - s.args().append("mode", to_str(node->mode())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const @@ -830,14 +1417,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeS bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node, locop::NodeSummary &s) const { - s.args().append("boxes", pepper::str(node->boxes())); - s.args().append("scores", pepper::str(node->scores())); - s.args().append("max_output_size", pepper::str(node->max_output_size())); - s.args().append("iou_threshold", pepper::str(node->iou_threshold())); - s.args().append("score_threshold", pepper::str(node->score_threshold())); + return summary_node(tbl(), node, s); +} - s.state(locop::NodeSummary::State::Complete); - return true; +bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node, @@ -848,32 +1434,22 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const { - s.args().append("indices", tbl()->lookup(node->indices())); - s.args().append("depth", tbl()->lookup(node->depth())); - s.args().append("on_value", tbl()->lookup(node->on_value())); - s.args().append("off_value", tbl()->lookup(node->off_value())); - s.args().append("axis", pepper::str(node->axis())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const { - for (uint32_t i = 0; i < node->values_count(); ++i) - s.args().append("values", tbl()->lookup(node->values(i))); - s.args().append("values_count", pepper::str(node->values_count())); - s.args().append("axis", pepper::str(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("paddings", tbl()->lookup(node->paddings())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); +} + +bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const @@ -883,20 +1459,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeS bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("alpha", tbl()->lookup(node->alpha())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const { - s.args().append("start", tbl()->lookup(node->start())); - s.args().append("limit", tbl()->lookup(node->limit())); - s.args().append("delta", tbl()->lookup(node->delta())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const @@ -946,52 +1514,31 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleReluN1To1 *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const { - s.args().append("tensor", tbl()->lookup(node->tensor())); - s.args().append("shape", tbl()->lookup(node->shape())); - // TODO Show newShape info - s.state(locop::NodeSummary::State::PartiallyKnown); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("size", tbl()->lookup(node->size())); - s.args().append("align_corners", node->align_corners() ? "true" : "false"); - s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("size", tbl()->lookup(node->size())); - s.args().append("align_corners", node->align_corners() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("seq_lengths", tbl()->lookup(node->seq_lengths())); - s.args().append("seq_axis", std::to_string(node->seq_axis())); - s.args().append("batch_axis", std::to_string(node->batch_axis())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node, locop::NodeSummary &s) const { - s.args().append("tensor", tbl()->lookup(node->tensor())); - s.args().append("axis", tbl()->lookup(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const @@ -1007,47 +1554,29 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::Nod bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node, locop::NodeSummary &s) const { - s.args().append("indices", tbl()->lookup(node->indices())); - s.args().append("updates", tbl()->lookup(node->updates())); - s.args().append("shape", tbl()->lookup(node->shape())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("segment_ids", tbl()->lookup(node->segment_ids())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const { - s.args().append("condition", tbl()->lookup(node->condition())); - s.args().append("t", tbl()->lookup(node->t())); - s.args().append("e", tbl()->lookup(node->e())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node, locop::NodeSummary &s) const { - s.args().append("condition", tbl()->lookup(node->condition())); - s.args().append("t", tbl()->lookup(node->t())); - s.args().append("e", tbl()->lookup(node->e())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("out_type", to_str(node->out_type())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const @@ -1057,82 +1586,40 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeS bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("begin", tbl()->lookup(node->begin())); - s.args().append("size", tbl()->lookup(node->size())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const { - s.args().append("logits", tbl()->lookup(node->logits())); - s.args().append("beta", pepper::str(node->beta())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("block_shape", tbl()->lookup(node->block_shape())); - s.args().append("paddings", tbl()->lookup(node->paddings())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("block_size", pepper::str(node->block_size())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node, locop::NodeSummary &s) const { - s.args().append("indices", tbl()->lookup(node->indices())); - s.args().append("output_shape", tbl()->lookup(node->output_shape())); - s.args().append("values", tbl()->lookup(node->values())); - s.args().append("default_value", tbl()->lookup(node->default_value())); - - s.args().append("Validate_indices", pepper::str(node->validate_indices())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const { - s.args().append("split_dim", tbl()->lookup(node->split_dim())); - s.args().append("input", tbl()->lookup(node->input())); - - s.args().append("num_split", pepper::str(node->num_split())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("size_splits", tbl()->lookup(node->size_splits())); - s.args().append("split_dim", tbl()->lookup(node->split_dim())); - - s.args().append("num_split", pepper::str(node->num_split())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const @@ -1153,38 +1640,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - - std::stringstream ss{"("}; - for (size_t i = 0; i < node->squeeze_dims().size(); ++i) - { - if (i != 0) - ss << ", "; - ss << node->squeeze_dims()[i]; - } - ss << ")"; - - s.args().append("squeeze_dims", ss.str()); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("begin", tbl()->lookup(node->begin())); - s.args().append("end", tbl()->lookup(node->end())); - s.args().append("strides", tbl()->lookup(node->strides())); - - s.args().append("begin_mask", pepper::str(node->begin_mask())); - s.args().append("end_mask", pepper::str(node->end_mask())); - s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask())); - s.args().append("new_axis_mask", pepper::str(node->new_axis_mask())); - s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const @@ -1204,92 +1666,44 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTanh *node, locop::Node bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("multiples", tbl()->lookup(node->multiples())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("k", tbl()->lookup(node->k())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node, locop::NodeSummary &s) const { - s.args().append("a", tbl()->lookup(node->a())); - s.args().append("perm", tbl()->lookup(node->perm())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node, locop::NodeSummary &s) const { - assert(node->padding() != luci::Padding::UNDEFINED); - - s.args().append("inputSizes", tbl()->lookup(node->inputSizes())); - s.args().append("filter", tbl()->lookup(node->filter())); - s.args().append("outBackprop", tbl()->lookup(node->outBackprop())); - - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const { - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("idx_out_type", to_str(node->idx_out_type())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const { - s.args().append("value", tbl()->lookup(node->value())); - - s.args().append("num", pepper::str(node->num())); - s.args().append("axis", pepper::str(node->axis())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const { - s.args().append("condition", tbl()->lookup(node->condition())); - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const { - for (uint32_t i = 0; i < node->input_count(); ++i) - s.args().append("input", tbl()->lookup(node->input(i))); - - if (node->cond_graph() != nullptr) - s.args().append("cond_graph", node->cond_graph()->name()); - else - s.args().append("cond_branch", pepper::str(node->cond_branch())); - - if (node->body_graph() != nullptr) - s.args().append("body_graph", node->body_graph()->name()); - else - s.args().append("body_branch", pepper::str(node->body_branch())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node, @@ -1313,29 +1727,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitVOut *node, bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node, locop::NodeSummary &s) const { - s.args().append("topkv2", tbl()->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node, locop::NodeSummary &s) const { - s.args().append("unique", tbl()->lookup(node->input())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node, locop::NodeSummary &s) const { - s.args().append("unpack", tbl()->lookup(node->input())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const @@ -1349,14 +1753,16 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out return use_input(tbl(), node, s); } -bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node, +bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node, locop::NodeSummary &s) const { - s.args().append("while", tbl()->lookup(node->input())); - - s.state(locop::NodeSummary::State::Complete); + return use_input(tbl(), node, s); +} - return true; +bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node, + locop::NodeSummary &s) const +{ + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const @@ -1367,61 +1773,25 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSum bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const { - s.args().append("from", tbl()->lookup(node->from())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node, locop::NodeSummary &s) const { - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("weights_scales", tbl()->lookup(node->weights_scales())); - s.args().append("weights_binary", tbl()->lookup(node->weights_binary())); - s.args().append("bias", tbl()->lookup(node->bias())); - s.args().append("weights_clusters", tbl()->lookup(node->weights_clusters())); - - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size())); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node, locop::NodeSummary &s) const { - s.args().append("input_scales", tbl()->lookup(node->input_scales())); - s.args().append("input_binary", tbl()->lookup(node->input_binary())); - s.args().append("indices", tbl()->lookup(node->indices())); - s.args().append("input_clusters", tbl()->lookup(node->input_clusters())); - - s.args().append("axis", pepper::str(node->axis())); - s.args().append("input_hidden_size", pepper::str(node->input_hidden_size())); - - s.state(locop::NodeSummary::State::Complete); - return true; + return summary_node(tbl(), node, s); } bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node, locop::NodeSummary &s) const { - auto fused = node->fusedActivationFunction(); - assert(fused != luci::FusedActFunc::UNDEFINED); - - s.args().append("input", tbl()->lookup(node->input())); - s.args().append("gamma", tbl()->lookup(node->gamma())); - s.args().append("beta", tbl()->lookup(node->beta())); - s.args().append("epsilon", pepper::str(node->epsilon())); - s.args().append("fused_activation_function", to_str(fused)); - - s.state(locop::NodeSummary::State::Complete); - - return true; + return summary_node(tbl(), node, s); } } // namespace diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 312749f83..a832844f8 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -32,6 +32,7 @@ public: { enum Algorithm { + FuseBatchNormWithTConv, FuseBCQ, FuseInstanceNorm, ResolveCustomOpAdd, @@ -39,6 +40,7 @@ public: ResolveCustomOpMatMul, QuantizeDequantizeWeights, QuantizeWithMinMax, + Requantize, }; enum AlgorithmParameters diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h new file mode 100644 index 000000000..d3e930a36 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__ +#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to fuse Batch Normalization into CircleTransposeConv + */ +struct FuseBatchNormWithTConvPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RequantizePass.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h new file mode 100644 index 000000000..2442b24ea --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REQUANTIZE_PASS_H__ +#define __LUCI_REQUANTIZE_PASS_H__ + +#include <loco.h> + +#include <logo/Pass.h> + +#include <luci/Pass/QuantizationParameters.h> + +namespace luci +{ + +/** + * @brief Pass to quantize weights + */ +class RequantizePass : public logo::Pass +{ +public: + RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype) + : _input_dtype{input_dtype}, _output_dtype{output_dtype} + { + // DO NOTHING + } + virtual const char *name(void) const { return "luci::RequantizePass"; } + +public: + bool run(loco::Graph *graph); + +private: + loco::DataType _input_dtype; + loco::DataType _output_dtype; +}; + +} // namespace luci + +#endif //__LUCI_REQUANTIZE_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 2edf7a9c6..2ee759b4e 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -16,11 +16,13 @@ #include "luci/CircleOptimizer.h" +#include "luci/Pass/FuseBatchNormWithTConv.h" #include "luci/Pass/FuseBCQPass.h" #include "luci/Pass/FuseInstanceNormPass.h" #include "luci/Pass/ResolveCustomOpAddPass.h" #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" +#include "luci/Pass/RequantizePass.h" #include "luci/Pass/QuantizeWithMinMaxPass.h" #include "luci/Pass/QuantizeDequantizeWeightsPass.h" // TODO add more passes @@ -34,6 +36,7 @@ #include "ProgressReporter.h" #include "CircleOptimizerUtils.h" +#include <luci/IR/CircleNodes.h> #include <logo/Phase.h> #include <memory> @@ -125,6 +128,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<FuseBCQPass>()); } + if (_options->query(Options::Algorithm::FuseBatchNormWithTConv)) + { + phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>()); + } // Shape inference is needed for added nodes doing above transformations phase.emplace_back(std::make_unique<luci::ShapeInferencePass>()); @@ -163,6 +170,14 @@ void CircleOptimizer::quantize(loco::Graph *g) const throw std::runtime_error("Unsupported granularity. List of supported granularity: " + to_string(fakeq_supported_granularity)); + // Clear existing quantparams before doing fake quantization + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (circle_node->quantparam() != nullptr) + circle_node->quantparam(nullptr); + } + luci::QuantizeDequantizeWeightsPass fake_quantizer( str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity)); fake_quantizer.run(g); @@ -196,6 +211,27 @@ void CircleOptimizer::quantize(loco::Graph *g) const quantizer.run(g); } + // Requantize + if (_options->query(Options::Algorithm::Requantize)) + { + static const std::vector<std::string> rq_supported_input_dtype{"int8"}; + static const std::vector<std::string> rq_supported_output_dtype{"uint8"}; + + auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype); + auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype); + + if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype)) + throw std::runtime_error("Unsupported input type. List of supported input types: " + + to_string(rq_supported_input_dtype)); + + if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype)) + throw std::runtime_error("Unsupported output type. List of supported output types: " + + to_string(rq_supported_output_dtype)); + + luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype)); + requantizer.run(g); + } + logo::Phase phase; // Do Shape/Type inference diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp index 260de5b30..7aa2e3e80 100644 --- a/compiler/luci/pass/src/FuseBCQPass.cpp +++ b/compiler/luci/pass/src/FuseBCQPass.cpp @@ -38,9 +38,9 @@ const std::string node_name_prefix(luci::NodeName node_name) { std::string prefix = node_name; - if (prefix.find("ReadVariableOp/resource/") != std::string::npos) + if (prefix.find("/ReadVariableOp/resource") != std::string::npos) { - const auto start_index = prefix.find("ReadVariableOp/resource/"); + const auto start_index = prefix.find("/ReadVariableOp/resource"); const auto left_prefix = prefix.substr(0, start_index); const auto right_prefix = prefix.substr(start_index + 24); diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp new file mode 100644 index 000000000..e39455b1a --- /dev/null +++ b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseBatchNormWithTConv.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ +/** + * NOTE TF's fusedBatchNorm is converted to mul and add of Circle. + * + * BEFORE + * + * [CircleTransposeConv] + * | + * [mul] + * | + * [add] + * AFTER + * + * [CircleTransposeConv] + */ +bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv) +{ + // check whether it has bias or not. This optimization works only if it doesn't. + auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias()); + if (not bias) + return false; + + // get weight of tconv + auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter()); + if (not filter) + return false; + if (filter->dtype() != loco::DataType::FLOAT32) + return false; + + // get mul node + auto tconv_output = loco::succs(tconv); + assert(tconv_output.size() == 1); + auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin()); + if (not mul) + return false; + if (mul->dtype() != loco::DataType::FLOAT32) + return false; + + // get add node + auto mul_output = loco::succs(mul); + assert(mul_output.size() == 1); + auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin()); + if (not add) + return false; + if (add->dtype() != loco::DataType::FLOAT32) + return false; + if (add->fusedActivationFunction() != luci::FusedActFunc::NONE && + add->fusedActivationFunction() != luci::FusedActFunc::RELU6) + return false; + + // get scale of batchnorm + auto scale = dynamic_cast<luci::CircleConst *>(mul->y()); + if (not scale) + return false; + + // scale dim(0) == tconv filter channel dim + if (filter->rank() != 4) + return false; + auto filter_channel_dim = filter->dim(3).value(); + if (scale->rank() != 1) + return false; + auto scale_dim = scale->dim(0).value(); + if (filter_channel_dim != scale_dim) + return false; + + // get shift of batchnorm + auto shift = dynamic_cast<luci::CircleConst *>(add->y()); + if (not shift) + return false; + + // shift dim(0) == tconv filter channel dim + if (shift->rank() != 1) + return false; + auto shift_dim = shift->dim(0).value(); + if (filter_channel_dim != shift_dim) + return false; + + // filter weight = filter weight * mul(scale) + add(shift) + uint32_t filter_batch_dim = filter->dim(0).value(); + uint32_t filter_height_dim = filter->dim(1).value(); + uint32_t filter_width_dim = filter->dim(2).value(); + for (uint32_t c = 0; c < filter_channel_dim; c++) + { + for (uint32_t n = 0; n < filter_batch_dim; n++) + { + for (uint32_t h = 0; h < filter_height_dim; h++) + { + for (uint32_t w = 0; w < filter_width_dim; w++) + { + uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim + + h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c; + filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c); + } + } + } + } + + // fuse shift with transposed conv + tconv->bias(shift); + + if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6) + { + // separate relu op from add op + auto relu = add->graph()->nodes()->create<luci::CircleRelu6>(); + relu->features(tconv); + + // remove mul node + replace(add).with(relu); + } + else + { + replace(add).with(tconv); + } + + return true; +} + +} // namespace + +namespace luci +{ + +bool FuseBatchNormWithTConvPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node); + if (not tconv) + continue; + + changed |= fused_batch_norm_with_tconv(tconv); + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index b335a53b4..60c1cdd72 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -472,7 +472,12 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool> if (granularity == QuantizationGranularity::ChannelWise) { auto quantparam = circle_node->quantparam(); - assert(quantparam != nullptr); + if (quantparam == nullptr) + { + assert(false && "quantparam is nullptr"); + return false; + } + auto min = quantparam->min; auto scaling_factor = quantparam->scale; int32_t channel_dim_index = 0; diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp new file mode 100644 index 000000000..49fbf76ec --- /dev/null +++ b/compiler/luci/pass/src/RequantizePass.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RequantizePass.h" +#include "QuantizationUtils.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Log.h> + +#include <oops/UserExn.h> + +#include <iostream> +#include <cmath> + +namespace luci +{ + +namespace +{ + +// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer +bool is_bias(CircleConst *node) +{ + if (node == nullptr) + return false; + + auto succs = loco::succs(node); + if (succs.size() != 1) // assume bias is used by only one node + return false; + + for (auto out : succs) + { + auto conv = dynamic_cast<CircleConv2D *>(out); + if (conv != nullptr && conv->bias() == node) + return true; + + auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out); + if (dw_conv != nullptr && dw_conv->bias() == node) + return true; + + auto fc = dynamic_cast<CircleFullyConnected *>(out); + if (fc != nullptr && fc->bias() == node) + return true; + + // TODO: add TransposeConv when bias is supported in CircleTransposeConv + } + return false; +} + +void requant_nonconst_int8_to_uint8(CircleNode *circle_node) +{ + assert(circle_node->dtype() == loco::DataType::S8); + + auto quantparam = circle_node->quantparam(); + assert(quantparam != nullptr); + for (size_t i = 0; i < quantparam->zerop.size(); ++i) + { + quantparam->zerop[i] += 128; + } + circle_node->dtype(loco::DataType::U8); +} + +// Requantize CircleConst from symmetric int8 to asymmetric uint8 +// Original values: -127 ~ 127 +// After requantization: 1 ~ 255 (zp <- zp + 128) +void requant_const_int8_to_uint8(CircleConst *node) +{ + assert(node->dtype() == loco::DataType::S8); + + uint32_t size = node->size<loco::DataType::S8>(); + std::vector<int32_t> requantized_values(size); + for (uint32_t i = 0; i < size; ++i) + { + int32_t data = node->at<loco::DataType::S8>(i); + requantized_values[i] = data + 128; + } + + node->dtype(loco::DataType::U8); // change the type of tensor + node->size<loco::DataType::U8>(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(1 <= requantized_values[i] && requantized_values[i] <= 255); + node->at<loco::DataType::U8>(i) = requantized_values[i]; + } + + auto quantparam = node->quantparam(); + assert(quantparam != nullptr); + for (size_t i = 0; i < quantparam->zerop.size(); ++i) + { + quantparam->zerop[i] += 128; + } +} + +/** + * @brief RequantizeNonConst requantizes tensors for activations + */ +struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool> +{ + RequantizeNonConst(loco::DataType input, loco::DataType output) + : _input_type(input), _output_type(output) + { + } + + loco::DataType _input_type; + loco::DataType _output_type; + + // Requantize input tensors of each node + bool visit(luci::CircleNode *node) + { + LOGGER(l); + INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl; + auto arity = node->arity(); + for (uint32_t i = 0; i < arity; i++) + { + auto input_node = node->arg(i); + auto circle_node = loco::must_cast<luci::CircleNode *>(input_node); + + // Check if this was quantized (only quantized tensors are requantized) + if (circle_node->quantparam() == nullptr) + continue; + + // Check if this is already requantized + if (circle_node->dtype() == _output_type) + continue; + + // Check if this is not const (only non-const is requantized in this function) + auto circle_const = dynamic_cast<CircleConst *>(circle_node); + if (circle_const != nullptr) + continue; + + if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8) + requant_nonconst_int8_to_uint8(circle_node); + } + return false; + } +}; + +/** + * @brief RequantizeConst requantizes tensors for weights + */ +struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool> +{ + RequantizeConst(loco::DataType input, loco::DataType output) + : _input_type(input), _output_type(output) + { + } + + loco::DataType _input_type; + loco::DataType _output_type; + + // Requantize input tensors of each node + bool visit(luci::CircleNode *node) + { + LOGGER(l); + INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl; + auto arity = node->arity(); + for (uint32_t i = 0; i < arity; i++) + { + auto input_node = node->arg(i); + auto circle_node = loco::must_cast<luci::CircleNode *>(input_node); + + // Check if this was quantized (only quantized tensors are requantized) + if (circle_node->quantparam() == nullptr) + continue; + + // Check if this is already requantized + if (circle_node->dtype() == _output_type) + continue; + + // Check if this is const (only const is requantized in this function) + auto circle_const = dynamic_cast<CircleConst *>(circle_node); + if (circle_const == nullptr) + continue; + + // Check if this is not bias + // bias is not requantized when int8 -> uint8 + if (is_bias(circle_const)) + continue; + + if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8) + requant_const_int8_to_uint8(circle_const); + } + return false; + } +}; + +} // namespace + +bool RequantizePass::run(loco::Graph *g) +{ + LOGGER(l); + INFO(l) << "RequantizePass Start" << std::endl; + + // Requantize non-const (activations) + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + RequantizeNonConst rqnc(_input_dtype, _output_dtype); + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + circle_node->accept(&rqnc); + } + + // Requantize const (including weights, constants) + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + RequantizeConst rqc(_input_dtype, _output_dtype); + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + circle_node->accept(&rqc); + } + + // Update output dtype + auto graph_outputs = g->outputs(); + for (auto node : loco::output_nodes(g)) + { + auto circle_node = loco::must_cast<luci::CircleOutput *>(node); + if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype) + { + circle_node->dtype(_output_dtype); + auto graph_output = graph_outputs->at(circle_node->index()); + graph_output->dtype(_output_dtype); + } + } + + INFO(l) << "RequantizePass End" << std::endl; + return false; // one time run +} + +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index 6355ec546..db25186b1 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -47,49 +47,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape return os; } -// Call this for CircleAvgPool2D and CircleMaxPool2D only -template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node) +loco::TensorShape own_shape(const luci::CircleNode *node) { - LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known"); - - auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>(); - assert(ifm_shape.rank() == 4); - - uint32_t input_height = ifm_shape.dim(1).value(); - uint32_t input_width = ifm_shape.dim(2).value(); - uint32_t stride_height = node->stride()->h(); - uint32_t stride_width = node->stride()->w(); - uint32_t window_height = node->filter()->h(); - uint32_t window_width = node->filter()->w(); - uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1 - uint32_t dilation_width = 1; - uint32_t effective_window_height = dilation_height * (window_height - 1) + 1; - uint32_t effective_window_width = dilation_width * (window_width - 1) + 1; - - uint32_t output_height = 0; - uint32_t output_width = 0; - - if (node->padding() == luci::Padding::VALID) - { - output_height = (input_height + stride_height - effective_window_height) / stride_height; - output_width = (input_width + stride_width - effective_window_width) / stride_width; - } - else if (node->padding() == luci::Padding::SAME) - { - output_height = (input_height + stride_height - 1) / stride_height; - output_width = (input_width + stride_width - 1) / stride_width; - } - else - LUCI_ASSERT(false, "Wrong padding type"); - - loco::TensorShape ofm_shape; - ofm_shape.rank(4); - ofm_shape.dim(0) = ifm_shape.dim(0); - ofm_shape.dim(1) = output_height; - ofm_shape.dim(2) = output_width; - ofm_shape.dim(3) = ifm_shape.dim(3); + loco::TensorShape shape; + shape.rank(node->rank()); + for (uint32_t r = 0; r < node->rank(); ++r) + shape.dim(r) = loco::Dimension(node->dim(r).value()); + return shape; +} - return loco::NodeShape{ofm_shape}; +loco::NodeShape use_own(const luci::CircleNode *node) +{ + loco::TensorShape shape = own_shape(node); + return loco::NodeShape{shape}; } /** @@ -192,6 +162,304 @@ loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::Tensor return output_shape; } +/** + * @brief vector_from_constant will return int64_t vector from CircleConst node + */ +template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node) +{ + std::vector<int64_t> result; + + for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx) + result.push_back(const_node->at<T>(idx)); + + return result; +} + +template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node) +{ + auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>(); + auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>(); + + auto output_shape = broadcast_shape(x_shape, y_shape); + + return loco::NodeShape{output_shape}; +} + +template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node) +{ + auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>(); + return loco::NodeShape{x_shape}; +} + +template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node) +{ + auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>(); + return loco::NodeShape{shape}; +} + +template <class CIRCLENODE> +loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>(); + + // TODO support other data type + LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now"); + LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2") + + int32_t n = paddings->dim(0).value(); + int32_t v = paddings->dim(1).value(); + + LUCI_ASSERT(v == 2, "paddings should be [n, 2]"); + LUCI_ASSERT(n == int32_t(input_shape.rank()), + "paddings [n, 2] should have same value of input rank"); + + loco::TensorShape output_shape; + + output_shape.rank(input_shape.rank()); + for (int32_t ni = 0; ni < n; ++ni) + { + int32_t idx = ni * 2; + int value = input_shape.dim(ni).value(); + value += paddings->at<S32>(idx + 0); // left + value += paddings->at<S32>(idx + 1); // right + output_shape.dim(ni) = value; + } + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_add_n(const luci::CircleAddN *node) +{ + auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>(); + + for (uint32_t idx = 1; idx < node->arity(); ++idx) + { + auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>(); + if (!(shape == shape_idx)) + { + INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx); + } + } + return loco::NodeShape{shape}; +} + +loco::NodeShape infer_arg_max(const luci::CircleArgMax *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>(); + + int64_t select_axis = 0; + { + LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); + + // Only support node's shape() is CircleConst with S32/S64 + // Support S32 for now. + auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); + LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, + "Only support int32 CircleConst for CircleArgMax"); + + if (const_shape_node->rank() > 1) + INTERNAL_EXN_V("Only support rank 0/1 CircleConst", + oops::to_uint32(const_shape_node->rank())); + + select_axis = const_shape_node->scalar<loco::DataType::S32>(); + } + assert(select_axis < input_shape.rank()); + assert(select_axis >= 0); // TODO support minus of this breaks + + // NOTE select_axis is removed + loco::TensorShape shape_output; + uint32_t rank = input_shape.rank(); + uint32_t shrink = static_cast<uint32_t>(select_axis); + assert(rank > 0); + shape_output.rank(rank - 1); + for (uint32_t r = 0, d = 0; r < rank; ++r) + { + if (r == shrink) + continue; + shape_output.dim(d++) = input_shape.dim(r); + } + return loco::NodeShape{shape_output}; +} + +loco::NodeShape infer_arg_min(const luci::CircleArgMin *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>(); + + int64_t select_axis = 0; + { + LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); + + // Only support node's shape() is CircleConst with S32/S64 + // Support S32 for now. + auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); + LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, + "Only support int32 CircleConst for CircleArgMin"); + + if (const_shape_node->rank() > 1) + INTERNAL_EXN_V("Only support rank 0/1 CircleConst", + oops::to_uint32(const_shape_node->rank())); + + select_axis = const_shape_node->scalar<loco::DataType::S32>(); + } + assert(select_axis < input_shape.rank()); + assert(select_axis >= 0); // TODO support minus of this breaks + + // NOTE select_axis is removed + loco::TensorShape shape_output; + uint32_t rank = input_shape.rank(); + uint32_t shrink = static_cast<uint32_t>(select_axis); + assert(rank > 0); + shape_output.rank(rank - 1); + for (uint32_t r = 0, d = 0; r < rank; ++r) + { + if (r == shrink) + continue; + shape_output.dim(d++) = input_shape.dim(r); + } + return loco::NodeShape{shape_output}; +} + +// Call this for CircleAvgPool2D and CircleMaxPool2D only +template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node) +{ + LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known"); + + auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>(); + assert(ifm_shape.rank() == 4); + + uint32_t input_height = ifm_shape.dim(1).value(); + uint32_t input_width = ifm_shape.dim(2).value(); + uint32_t stride_height = node->stride()->h(); + uint32_t stride_width = node->stride()->w(); + uint32_t window_height = node->filter()->h(); + uint32_t window_width = node->filter()->w(); + uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1 + uint32_t dilation_width = 1; + uint32_t effective_window_height = dilation_height * (window_height - 1) + 1; + uint32_t effective_window_width = dilation_width * (window_width - 1) + 1; + + uint32_t output_height = 0; + uint32_t output_width = 0; + + if (node->padding() == luci::Padding::VALID) + { + output_height = (input_height + stride_height - effective_window_height) / stride_height; + output_width = (input_width + stride_width - effective_window_width) / stride_width; + } + else if (node->padding() == luci::Padding::SAME) + { + output_height = (input_height + stride_height - 1) / stride_height; + output_width = (input_width + stride_width - 1) / stride_width; + } + else + LUCI_ASSERT(false, "Wrong padding type"); + + loco::TensorShape ofm_shape; + ofm_shape.rank(4); + ofm_shape.dim(0) = ifm_shape.dim(0); + ofm_shape.dim(1) = output_height; + ofm_shape.dim(2) = output_width; + ofm_shape.dim(3) = ifm_shape.dim(3); + + return loco::NodeShape{ofm_shape}; +} + +loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + // Support only input rank is 3 and 4 + assert(input_shape.rank() == 3 || input_shape.rank() == 4); + + // Only support block_shape() with S32 type CircleConst for now + auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape()); + LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, "Only support int32 block_shape"); + + // Only support crops() with S32 type CircleConst for now + auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops()); + LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops"); + + auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>(); + auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>(); + assert(const_block_shape_shape.rank() == 1); + assert(const_crops_shape.rank() == 2); + + int32_t input_spatial_dim = input_shape.rank() - 2; + assert(const_block_shape_shape.dim(0) == input_spatial_dim); + assert(const_crops_shape.dim(0) == input_spatial_dim); + assert(const_crops_shape.dim(1) == 2); + + loco::TensorShape shape_output; + + shape_output.rank(input_shape.rank()); + + int32_t output_batch_size = input_shape.dim(0).value(); + for (int32_t dim = 0; dim < input_spatial_dim; ++dim) + { + int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim); + dim_size -= const_crops->at<S32>(dim * 2); + dim_size -= const_crops->at<S32>(dim * 2 + 1); + shape_output.dim(dim + 1) = dim_size; + + assert(output_batch_size % const_block_shape->at<S32>(dim) == 0); + output_batch_size = output_batch_size / const_block_shape->at<S32>(dim); + } + shape_output.dim(0) = output_batch_size; + shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1); + + return loco::NodeShape{shape_output}; +} + +struct OutputSize +{ + uint32_t height = 0; + uint32_t width = 0; +}; + +template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node) +{ + auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>(); + auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>(); + assert(ifm_shape.rank() == 4); + assert(ker_shape.rank() == 4); + + uint32_t input_height = ifm_shape.dim(1).value(); + uint32_t input_width = ifm_shape.dim(2).value(); + uint32_t stride_height = node->stride()->h(); + uint32_t stride_width = node->stride()->w(); + uint32_t ker_height = ker_shape.dim(1).value(); + uint32_t ker_width = ker_shape.dim(2).value(); + uint32_t dilation_height = node->dilation()->h(); + uint32_t dilation_width = node->dilation()->w(); + uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1; + uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1; + + uint32_t output_height = 0; + uint32_t output_width = 0; + + if (node->padding() == luci::Padding::VALID) + { + output_height = (input_height + stride_height - effective_ker_height) / stride_height; + output_width = (input_width + stride_width - effective_ker_width) / stride_width; + } + else if (node->padding() == luci::Padding::SAME) + { + output_height = (input_height + stride_height - 1) / stride_height; + output_width = (input_width + stride_width - 1) / stride_width; + } + else + LUCI_ASSERT(false, "Wrong padding type"); + + OutputSize os{output_height, output_width}; + + return os; +} + // BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't) // TODO Distinguish BatchMatMul and BatchMatMulV2 loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape, @@ -238,13 +506,325 @@ loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape, return loco::NodeShape{output_shape}; } -loco::TensorShape own_shape(const luci::CircleNode *node) +loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node) +{ + // TODO Support when CircleConcatenation has 0 input + assert(node->numValues() > 0); + + auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>(); + auto axis = node->axis(); + if (axis < 0) + axis += first_shape.rank(); + + assert(0 <= axis); + assert(first_shape.rank() > static_cast<uint32_t>(axis)); + + loco::TensorShape output_shape; + + output_shape.rank(first_shape.rank()); + for (uint32_t i = 0; i < output_shape.rank(); ++i) + output_shape.dim(i) = first_shape.dim(i); + + for (uint32_t i = 1; i < node->numValues(); ++i) + { + auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>(); + + for (uint32_t j = 0; j < output_shape.rank(); ++j) + { + if (j == static_cast<uint32_t>(axis)) + output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value(); + else + assert(output_shape.dim(j) == input_shape.dim(j)); + } + } + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_conv2d(const luci::CircleConv2D *node) +{ + LOGGER(l); + + auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC + auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI + + INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank() + << ")" << std::endl; + + assert(ifm_shape.rank() == 4); + assert(ker_shape.rank() == 4); + assert(ifm_shape.dim(3) == ker_shape.dim(3)); + + auto os = infer_conv2d_type(node); + + loco::TensorShape ofm_shape; + ofm_shape.rank(4); + ofm_shape.dim(0) = ifm_shape.dim(0); + ofm_shape.dim(1) = os.height; + ofm_shape.dim(2) = os.width; + ofm_shape.dim(3) = ker_shape.dim(0); + + return loco::NodeShape{ofm_shape}; +} + +loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported"); + + // Only data format NHWC is supported + // TODO need to clarify what to do with layout in this operator + int32_t height = input_shape.dim(1).value(); + int32_t width = input_shape.dim(2).value(); + int32_t depth = input_shape.dim(3).value(); + + int block_size = node->block_size(); + + if (block_size < 2) + INTERNAL_EXN("Block size must be >= 2"); + + if (depth % (block_size * block_size)) + { + INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2"); + } + + loco::TensorShape output_shape; + output_shape.rank(4); + + output_shape.dim(0) = input_shape.dim(0).value(); + output_shape.dim(1) = height * block_size; + output_shape.dim(2) = width * block_size; + output_shape.dim(3) = depth / (block_size * block_size); + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node) +{ + auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC + auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM + + assert(ifm_shape.rank() == 4); + assert(ker_shape.rank() == 4); + assert(ker_shape.dim(0).value() == 1); + + auto os = infer_conv2d_type(node); + + loco::TensorShape ofm_shape; + ofm_shape.rank(4); + ofm_shape.dim(0) = ifm_shape.dim(0); + ofm_shape.dim(1) = os.height; + ofm_shape.dim(2) = os.width; + ofm_shape.dim(3) = ker_shape.dim(3); + + return loco::NodeShape{ofm_shape}; +} + +loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node) +{ + const loco::DataType S32 = loco::DataType::S32; + auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + if (x_shape.rank() == 0) + { + // This maybe for unknown shape. We use shape from the node itself. + return use_own(node); + } + auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis()); + LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis"); + if (const_axis->rank() != 0 && const_axis->rank() != 1) + { + INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum()); + } + int32_t axis = const_axis->at<S32>(0); + LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) && + (axis >= -1 - static_cast<int32_t>(x_shape.rank())), + "Axis has to be between [-(D+1), D], where D is rank of input."); + size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis; + loco::TensorShape output_shape; + output_shape.rank(x_shape.rank() + 1); + size_t i = 0; + for (; i < positive_axis; i++) + output_shape.dim(i) = x_shape.dim(i); + output_shape.dim(i) = loco::Dimension(1); + for (; i < x_shape.rank(); i++) + output_shape.dim(i + 1) = x_shape.dim(i); + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_fill(const luci::CircleFill *node) { loco::TensorShape shape; - shape.rank(node->rank()); - for (uint32_t r = 0; r < node->rank(); ++r) - shape.dim(r) = loco::Dimension(node->dim(r).value()); - return shape; + { + LUCI_ASSERT(node->dims(), "dims input should not be nullptr"); + + auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims()); + if (dims_node != nullptr) + { + // Only support node with S32 + LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst"); + + if (dims_node->rank() != 1) + INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank())); + + shape.rank(dims_node->dim(0).value()); + + for (uint32_t axis = 0; axis < shape.rank(); ++axis) + { + shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis); + } + } + else + { + shape = own_shape(node); + } + } + + return loco::NodeShape{shape}; +} + +loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>(); + + // Checking shape capability for fully connected layer + // Input: a tensor of at least rank 2 [D1, D2, ... Dn] + // Weight: [# of units, K] + // Output: [D1 * D2 * ... * Dn / K, # of units] + if (input_shape.rank() < 2 || weights_shape.rank() != 2) + { + // Return node own shape if shape inference is not possible + return use_own(node); + } + + uint32_t input_size = 1; + for (uint32_t i = 0; i < input_shape.rank(); i++) + { + input_size = input_size * input_shape.dim(i).value(); + } + const uint32_t batch_size = input_size / weights_shape.dim(1).value(); + loco::TensorShape out_shape; + out_shape.rank(2); + out_shape.dim(0) = batch_size; + out_shape.dim(1) = weights_shape.dim(0); + + return loco::NodeShape{out_shape}; +} + +loco::NodeShape infer_gather(const luci::CircleGather *node) +{ + loco::TensorShape output_shape; + + const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>(); + const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); + int32_t axis = node->axis(); + + // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the + // shape that node already has. + if (input_shape.rank() == 0 || positions_shape.rank() == 0) + return use_own(node); + + if (axis < 0) + axis += input_shape.rank(); + + output_shape.rank(input_shape.rank() - 1 + positions_shape.rank()); + int32_t outdim_index = 0; + for (int32_t i = 0; i < axis; ++i) + output_shape.dim(outdim_index++) = input_shape.dim(i); + for (uint32_t i = 0; i < positions_shape.rank(); ++i) + output_shape.dim(outdim_index++) = positions_shape.dim(i); + for (uint32_t i = axis + 1; i < input_shape.rank(); ++i) + output_shape.dim(outdim_index++) = input_shape.dim(i); + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node) +{ + loco::TensorShape output_shape; + + const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>(); + const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); + + const auto params_rank = params_shape.rank(); + const auto indices_rank = indices_shape.rank(); + + // see https://www.tensorflow.org/api_docs/python/tf/gather_nd + // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:] + // batch_dims isn't supported in tflite + + // TODO: replace exceptions with setting shape to unknown? + + if (!indices_shape.dim(indices_rank - 1).known()) + INTERNAL_EXN("Last indices dimension is unknown"); + + auto indices_last_dim = indices_shape.dim(indices_rank - 1).value(); + + if (indices_last_dim > params_rank) + INTERNAL_EXN("Last indices dimension should be <= params rank"); + + const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1; + + output_shape.rank(output_rank); + + uint32_t output_index = 0; + for (uint32_t i = 0; i < indices_rank - 1; ++i) + { + auto &dim = indices_shape.dim(i); + if (!dim.known()) + INTERNAL_EXN("Unknown indices dimension is unsupported"); + output_shape.dim(output_index++).set(dim.value()); + } + + for (uint32_t i = indices_last_dim; i < params_rank; ++i) + { + auto &dim = params_shape.dim(i); + if (!dim.known()) + INTERNAL_EXN("Unknown params dimension is unsupported"); + output_shape.dim(output_index++).set(dim.value()); + } + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node) +{ + loco::TensorShape output_shape; + + auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>(); + auto rank = diagonal_shape.rank(); + + output_shape.rank(rank + 1); + + for (uint32_t i = 0; i < rank; i++) + { + output_shape.dim(i) = diagonal_shape.dim(i); + } + + output_shape.dim(rank) = diagonal_shape.dim(rank - 1); + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>(); + + auto rank = diagonal_shape.rank(); + + LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1"); + + for (uint32_t i = 0; i < rank - 1; i++) + { + LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims"); + } + + auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value()); + + LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error"); + + return loco::NodeShape{input_shape}; } loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims) @@ -302,885 +882,1311 @@ loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indic return output_shape; } -/** - * @brief vector_from_constant will return int64_t vector from CircleConst node - */ -template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node) +loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node) { - std::vector<int64_t> result; - - for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx) - result.push_back(const_node->at<T>(idx)); + // TODO support non-const case + auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); + return use_paddings(node, paddings); +} - return result; +loco::NodeShape infer_one_hot(const luci::CircleOneHot *node) +{ + const loco::DataType S32 = loco::DataType::S32; + auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); + // Only support OneHot node's depth() is CircleConst with type S32 + // TODO support depth with other types + auto depth = loco::must_cast<luci::CircleConst *>(node->depth()); + LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst"); + if (depth->rank() != 0) + INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank())); + loco::TensorShape output_shape; + output_shape.rank(indices_shape.rank() + 1); + auto axis = node->axis(); + if (axis < 0) + axis += indices_shape.rank() + 1; + LUCI_ASSERT(0 <= axis, "Axis is out of range"); + LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range"); + uint32_t j = 0; + for (uint32_t i = 0; i < output_shape.rank(); i++) + { + if (i == static_cast<uint32_t>(axis)) + { + output_shape.dim(i) = depth->at<S32>(0); + } + else + { + output_shape.dim(i) = indices_shape.dim(j++); + } + } + return loco::NodeShape{output_shape}; } -template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node) +loco::NodeShape infer_pack(const luci::CirclePack *node) { - auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>(); - auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>(); + LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs"); - auto output_shape = broadcast_shape(x_shape, y_shape); + auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>(); + // Make sure all inputs have the same shape. + for (uint32_t i = 1; i < node->values_count(); ++i) + { + auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>(); + LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape}, + "All inputs must have the same shape"); + } + + // Checking shape capability for pack layer + // Input: tensors [D1, D2, ... Dn] + // Axis: K + // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn] + auto axis = node->axis(); + if (axis < 0) + axis += first_shape.rank() + 1; + + LUCI_ASSERT(0 <= axis, "Axis is out of range"); + LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range"); + + loco::TensorShape output_shape; + output_shape.rank(first_shape.rank() + 1); + + uint32_t j = 0; + for (uint32_t i = 0; i < output_shape.rank(); ++i) + { + if (i == static_cast<uint32_t>(axis)) + { + output_shape.dim(i) = node->values_count(); + } + else + { + output_shape.dim(i) = first_shape.dim(j++); + } + } return loco::NodeShape{output_shape}; } -template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node) +loco::NodeShape infer_pad(const luci::CirclePad *node) { - auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>(); - return loco::NodeShape{x_shape}; + // TODO support non-const case + auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); + return use_paddings(node, paddings); } -template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node) +loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node) { - auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>(); - return loco::NodeShape{shape}; + // TODO support non-const case + auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings()); + if (!paddings) + { + auto node_shape = own_shape(node); + return loco::NodeShape{node_shape}; + } + return use_paddings(node, paddings); } -loco::NodeShape use_own(const luci::CircleNode *node) +loco::NodeShape infer_p_relu(const luci::CirclePRelu *node) { - loco::TensorShape shape = own_shape(node); - return loco::NodeShape{shape}; + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>(); + + auto output_shape = broadcast_shape(input_shape, alpha_shape); + + return loco::NodeShape{output_shape}; } -/** - * @brief Class to infer the shape of CircleNode - * - * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor - */ -class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape> +loco::NodeShape infer_range(const luci::CircleRange *node) { -public: - loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); } + loco::TensorShape output_shape; + output_shape.rank(1); - loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); } + auto start_node = dynamic_cast<luci::CircleConst *>(node->start()); + auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit()); + auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta()); - loco::NodeShape visit(const luci::CircleAddN *node) final + if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr) { - auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>(); + return use_own(node); + } - for (uint32_t idx = 1; idx < node->arity(); ++idx) - { - auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>(); - if (!(shape == shape_idx)) - { - INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx); - } - } + double start = 0, limit = 0, delta = 0; + +#define GET_RANGE_PARAM(DT) \ + start = start_node->scalar<DT>(); \ + limit = limit_node->scalar<DT>(); \ + delta = delta_node->scalar<DT>(); - return loco::NodeShape{shape}; + switch (start_node->dtype()) + { + case loco::DataType::FLOAT32: + GET_RANGE_PARAM(loco::DataType::FLOAT32) + break; + case loco::DataType::S32: + GET_RANGE_PARAM(loco::DataType::S32) + break; + default: + INTERNAL_EXN("Range data type not supported"); } - loco::NodeShape visit(const luci::CircleArgMax *node) final +#undef GET_RANGE_PARAM + + if (delta == 0) + INTERNAL_EXN("Delta can not be zero"); + + output_shape.dim(0) = ceil((limit - start) / delta); + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_reshape(const luci::CircleReshape *node) +{ + LOGGER(l); + + const loco::DataType S32 = loco::DataType::S32; + + loco::TensorShape shape_by_input; { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>(); + LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr"); - int64_t select_axis = 0; + // Only support node's shape() is CircleConst with S32 + // TODO support other node with other types + auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape()); + if (const_shape_node != nullptr) { - LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); - - // Only support node's shape() is CircleConst with S32/S64 - // Support S32 for now. - auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); - LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMax"); + LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst"); - if (const_shape_node->rank() > 1) - INTERNAL_EXN_V("Only support rank 0/1 CircleConst", - oops::to_uint32(const_shape_node->rank())); + shape_by_input.rank(const_shape_node->size<S32>()); - select_axis = const_shape_node->scalar<loco::DataType::S32>(); + for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis) + { + shape_by_input.dim(axis) = const_shape_node->at<S32>(axis); + } } - assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks - - // NOTE select_axis is removed - loco::TensorShape shape_output; - uint32_t rank = input_shape.rank(); - uint32_t shrink = static_cast<uint32_t>(select_axis); - assert(rank > 0); - shape_output.rank(rank - 1); - for (uint32_t r = 0, d = 0; r < rank; ++r) + else { - if (r == shrink) - continue; - shape_output.dim(d++) = input_shape.dim(r); + // We use shape from the node itself + shape_by_input = own_shape(node); } - return loco::NodeShape{shape_output}; } - loco::NodeShape visit(const luci::CircleArgMin *node) final + loco::TensorShape shape_by_attr; { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>(); + shape_by_attr.rank(node->newShape()->rank()); - int64_t select_axis = 0; + for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis) { - LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); + shape_by_attr.dim(axis) = node->newShape()->dim(axis); + } + } - // Only support node's shape() is CircleConst with S32/S64 - // Support S32 for now. - auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); - LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMin"); + if (!(shape_by_input == shape_by_attr)) + { + INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl; + INFO(l) << " shape_by_input : " << shape_by_input << std::endl; + INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl; + } - if (const_shape_node->rank() > 1) - INTERNAL_EXN_V("Only support rank 0/1 CircleConst", - oops::to_uint32(const_shape_node->rank())); + loco::TensorShape output_shape = shape_by_input; - select_axis = const_shape_node->scalar<loco::DataType::S32>(); + // One of the dimensions can have special value -1, meaning its actual value should be inferred. + const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>(); + const uint32_t input_element_count = loco::element_count(&input_shape); + uint32_t output_element_count = 1; + uint32_t unknown_dim_index = UINT32_MAX; + for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index) + { + const uint32_t dim_value = output_shape.dim(dim_index).value(); + if (static_cast<int>(dim_value) == -1) + { + LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension"); + unknown_dim_index = dim_index; } - assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks - - // NOTE select_axis is removed - loco::TensorShape shape_output; - uint32_t rank = input_shape.rank(); - uint32_t shrink = static_cast<uint32_t>(select_axis); - assert(rank > 0); - shape_output.rank(rank - 1); - for (uint32_t r = 0, d = 0; r < rank; ++r) + else { - if (r == shrink) - continue; - shape_output.dim(d++) = input_shape.dim(r); + output_element_count *= dim_value; } - return loco::NodeShape{shape_output}; } - - loco::NodeShape visit(const luci::CircleAveragePool2D *node) final + if (unknown_dim_index != UINT32_MAX) { - return infer_pool_2d_shape(node); + output_shape.dim(unknown_dim_index) = input_element_count / output_element_count; } - loco::NodeShape visit(const luci::CircleBatchMatMul *node) final - { - auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>(); - auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>(); + return loco::NodeShape{output_shape}; +} - return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y()); - } +loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final - { - const loco::DataType S32 = loco::DataType::S32; + if (input_shape.rank() != 4) + INTERNAL_EXN("Expected ResizeBilinear input to have rank 4"); - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - // Support only input rank is 3 and 4 - assert(input_shape.rank() == 3 || input_shape.rank() == 4); + auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); - // Only support block_shape() with S32 type CircleConst for now - auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape()); - LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, - "Only support int32 block_shape"); + if (const_node->dtype() != loco::DataType::S32) + INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size"); - // Only support crops() with S32 type CircleConst for now - auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops()); - LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops"); + if (const_node->rank() != 1) + INTERNAL_EXN("Expected size tensor of rank 1"); - auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>(); - auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>(); - assert(const_block_shape_shape.rank() == 1); - assert(const_crops_shape.rank() == 2); + if (const_node->dim(0).value() != 2) + INTERNAL_EXN("Expected size tensor with shape [2]"); - int32_t input_spatial_dim = input_shape.rank() - 2; - assert(const_block_shape_shape.dim(0) == input_spatial_dim); - assert(const_crops_shape.dim(0) == input_spatial_dim); - assert(const_crops_shape.dim(1) == 2); + loco::TensorShape output_shape; + output_shape.rank(4); + output_shape.dim(0) = input_shape.dim(0); + output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); + output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); + output_shape.dim(3) = input_shape.dim(3); - loco::TensorShape shape_output; + return loco::NodeShape{output_shape}; +} - shape_output.rank(input_shape.rank()); +loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - int32_t output_batch_size = input_shape.dim(0).value(); - for (int32_t dim = 0; dim < input_spatial_dim; ++dim) - { - int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim); - dim_size -= const_crops->at<S32>(dim * 2); - dim_size -= const_crops->at<S32>(dim * 2 + 1); - shape_output.dim(dim + 1) = dim_size; + if (input_shape.rank() != 4) + INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4"); - assert(output_batch_size % const_block_shape->at<S32>(dim) == 0); - output_batch_size = output_batch_size / const_block_shape->at<S32>(dim); - } - shape_output.dim(0) = output_batch_size; - shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1); + auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); - return loco::NodeShape{shape_output}; - } + if (const_node->dtype() != loco::DataType::S32) + INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size"); - loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); } + if (const_node->rank() != 1) + INTERNAL_EXN("Expected size tensor of rank 1"); - loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); } + if (const_node->dim(0).value() != 2) + INTERNAL_EXN("Expected size tensor with shape [2]"); - loco::NodeShape visit(const luci::CircleConcatenation *node) final - { - // TODO Support when CircleConcatenation has 0 input - assert(node->numValues() > 0); + loco::TensorShape output_shape; + output_shape.rank(4); + output_shape.dim(0) = input_shape.dim(0); + output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); + output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); + output_shape.dim(3) = input_shape.dim(3); - auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>(); - auto axis = node->axis(); - if (axis < 0) - axis += first_shape.rank(); + return loco::NodeShape{output_shape}; +} - assert(0 <= axis); - assert(first_shape.rank() > static_cast<uint32_t>(axis)); +loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node) +{ + loco::TensorShape output_shape; - loco::TensorShape output_shape; + auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape()); - output_shape.rank(first_shape.rank()); - for (uint32_t i = 0; i < output_shape.rank(); ++i) - output_shape.dim(i) = first_shape.dim(i); + const loco::DataType S32 = loco::DataType::S32; + const loco::DataType S64 = loco::DataType::S64; - for (uint32_t i = 1; i < node->numValues(); ++i) - { - auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>(); + std::vector<int64_t> vect_shape; - for (uint32_t j = 0; j < output_shape.rank(); ++j) - { - if (j == static_cast<uint32_t>(axis)) - output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value(); - else - assert(output_shape.dim(j) == input_shape.dim(j)); - } - } + if (shape_node->dtype() == S32) + vect_shape = vector_from_constant<S32>(shape_node); + else if (shape_node->dtype() == S64) + vect_shape = vector_from_constant<S64>(shape_node); + else + LUCI_ASSERT(false, "Only support int32/int64 for shape()"); - return loco::NodeShape{output_shape}; - } + output_shape.rank(vect_shape.size()); + for (uint32_t i = 0; i < vect_shape.size(); ++i) + output_shape.dim(i) = vect_shape[i]; - loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); } + return loco::NodeShape{output_shape}; +} - loco::NodeShape visit(const luci::CircleConv2D *node) final - { - LOGGER(l); +loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>(); - auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC - auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI + LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor"); + LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(), + "segment_ids size must be equal to the size of data's first dimension"); - INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" - << ker_shape.rank() << ")" << std::endl; + auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids()); - assert(ifm_shape.rank() == 4); - assert(ker_shape.rank() == 4); - assert(ifm_shape.dim(3) == ker_shape.dim(3)); + std::vector<int64_t> vect_ids; - uint32_t input_height = ifm_shape.dim(1).value(); - uint32_t input_width = ifm_shape.dim(2).value(); - uint32_t stride_height = node->stride()->h(); - uint32_t stride_width = node->stride()->w(); - uint32_t ker_height = ker_shape.dim(1).value(); - uint32_t ker_width = ker_shape.dim(2).value(); - uint32_t dilation_height = node->dilation()->h(); - uint32_t dilation_width = node->dilation()->w(); - uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1; - uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1; + if (ids_shape_value->dtype() == loco::DataType::S32) + vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value); - uint32_t output_height = 0; - uint32_t output_width = 0; + LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()), + "segment_ids values should be sorted") - if (node->padding() == luci::Padding::VALID) - { - output_height = (input_height + stride_height - effective_ker_height) / stride_height; - output_width = (input_width + stride_width - effective_ker_width) / stride_width; - } - else if (node->padding() == luci::Padding::SAME) + loco::TensorShape output_shape; + + output_shape.rank(input_shape.rank()); + + for (uint32_t i = 1; i < input_shape.rank(); ++i) + output_shape.dim(i) = input_shape.dim(i); + + output_shape.dim(0) = vect_ids.back() + 1; + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_select(const luci::CircleSelect *node) +{ + auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>(); + assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>()); + + // condition shape validation + auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>(); + if (c_shape.rank() != t_shape.rank()) + { + if (c_shape.rank() != 0 && c_shape.rank() != 1) + INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank()); + + if (c_shape.rank() == 1) { - output_height = (input_height + stride_height - 1) / stride_height; - output_width = (input_width + stride_width - 1) / stride_width; + if (c_shape.dim(0).value() != t_shape.dim(0).value()) + INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)"); } - else - LUCI_ASSERT(false, "Wrong padding type"); + } - loco::TensorShape ofm_shape; - ofm_shape.rank(4); - ofm_shape.dim(0) = ifm_shape.dim(0); - ofm_shape.dim(1) = output_height; - ofm_shape.dim(2) = output_width; - ofm_shape.dim(3) = ker_shape.dim(0); + return loco::NodeShape{t_shape}; +} - return loco::NodeShape{ofm_shape}; - } +loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node) +{ + auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>(); + auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>(); + auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); } + // validate ability to broadcast shapes to each other + auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape); + return loco::NodeShape{b_shape}; +} - loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); } +loco::NodeShape infer_shape(const luci::CircleShape *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleDepthToSpace *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported"); + loco::TensorShape output_shape; - // Only data format NHWC is supported - // TODO need to clarify what to do with layout in this operator - int32_t height = input_shape.dim(1).value(); - int32_t width = input_shape.dim(2).value(); - int32_t depth = input_shape.dim(3).value(); + output_shape.rank(1); + output_shape.dim(0) = input_shape.rank(); - int block_size = node->block_size(); + return loco::NodeShape{output_shape}; +} - if (block_size < 2) - INTERNAL_EXN("Block size must be >= 2"); +loco::NodeShape infer_slice(const luci::CircleSlice *node) +{ + const loco::DataType S32 = loco::DataType::S32; + const loco::DataType S64 = loco::DataType::S64; + + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - if (depth % (block_size * block_size)) + auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin()); + auto const_size = loco::must_cast<luci::CircleConst *>(node->size()); + + loco::TensorShape output_shape; + std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t + std::vector<int64_t> vect_size; + + if (const_begin->dtype() == S32) + vect_begin = vector_from_constant<S32>(const_begin); + else if (const_begin->dtype() == S64) + vect_begin = vector_from_constant<S64>(const_begin); + else + LUCI_ASSERT(false, "Only support int32/int64 for begin()"); + + if (const_size->dtype() == S32) + vect_size = vector_from_constant<S32>(const_size); + else if (const_size->dtype() == S64) + vect_size = vector_from_constant<S64>(const_size); + else + LUCI_ASSERT(false, "Only support int32/int64 for size()"); + + assert(input_shape.rank() == vect_begin.size()); + assert(input_shape.rank() == vect_size.size()); + + output_shape.rank(vect_begin.size()); + for (uint32_t idx = 0; idx < vect_begin.size(); ++idx) + { + auto size = vect_size.at(idx); + if (size == -1) { - INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2"); + size = input_shape.dim(idx).value() - vect_begin.at(idx); } + output_shape.dim(idx) = size; + } - loco::TensorShape output_shape; - output_shape.rank(4); + return loco::NodeShape{output_shape}; +} - output_shape.dim(0) = input_shape.dim(0).value(); - output_shape.dim(1) = height * block_size; - output_shape.dim(2) = width * block_size; - output_shape.dim(3) = depth / (block_size * block_size); +loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node) +{ + const loco::DataType S32 = loco::DataType::S32; - return loco::NodeShape{output_shape}; - } + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + // Support only input rank is 3 and 4 + assert(input_shape.rank() == 3 || input_shape.rank() == 4); - loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final - { - auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC - auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM + // Only support block_shape() with S32 type CircleConst for now + auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape()); + LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape"); - assert(ifm_shape.rank() == 4); - assert(ker_shape.rank() == 4); - assert(ker_shape.dim(0).value() == 1); + // Only support paddings() with S32 type CircleConst for now + auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); + LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings"); - uint32_t input_height = ifm_shape.dim(1).value(); - uint32_t input_width = ifm_shape.dim(2).value(); - uint32_t stride_height = node->stride()->h(); - uint32_t stride_width = node->stride()->w(); - uint32_t ker_height = ker_shape.dim(1).value(); - uint32_t ker_width = ker_shape.dim(2).value(); - uint32_t dilation_height = node->dilation()->h(); - uint32_t dilation_width = node->dilation()->w(); - uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1; - uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1; + auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>(); + auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>(); + assert(const_block_shape_shape.rank() == 1); + assert(const_paddings_shape.rank() == 2); - uint32_t output_height = 0; - uint32_t output_width = 0; + int32_t input_spatial_dim = input_shape.rank() - 2; + assert(const_block_shape_shape.dim(0) == input_spatial_dim); + assert(const_paddings_shape.dim(0) == input_spatial_dim); + assert(const_paddings_shape.dim(1) == 2); - if (node->padding() == luci::Padding::VALID) - { - output_height = (input_height + stride_height - effective_ker_height) / stride_height; - output_width = (input_width + stride_width - effective_ker_width) / stride_width; - } - else if (node->padding() == luci::Padding::SAME) + // Check all values of block_shape >= 1 + uint32_t ele_count = const_block_shape->size<S32>(); + for (uint32_t e = 0; e < ele_count; ++e) + { + auto val = const_block_shape->at<S32>(e); + if (val < 1) { - output_height = (input_height + stride_height - 1) / stride_height; - output_width = (input_width + stride_width - 1) / stride_width; + INTERNAL_EXN_V("All values of block_shape >= 1: ", e); } - else - LUCI_ASSERT(false, "Wrong padding type"); + } - loco::TensorShape ofm_shape; - ofm_shape.rank(4); - ofm_shape.dim(0) = ifm_shape.dim(0); - ofm_shape.dim(1) = output_height; - ofm_shape.dim(2) = output_width; - ofm_shape.dim(3) = ker_shape.dim(3); + loco::TensorShape shape_output; - return loco::NodeShape{ofm_shape}; + shape_output.rank(input_shape.rank()); + + int32_t output_batch_size = input_shape.dim(0).value(); + for (int32_t dim = 0; dim < input_spatial_dim; ++dim) + { + int dim_size = input_shape.dim(dim + 1).value(); + dim_size += const_paddings->at<S32>(dim * 2); + dim_size += const_paddings->at<S32>(dim * 2 + 1); + shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim); + + assert(dim_size % const_block_shape->at<S32>(dim) == 0); + output_batch_size = output_batch_size * const_block_shape->at<S32>(dim); } + shape_output.dim(0) = output_batch_size; + shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1); - loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); } + return loco::NodeShape{shape_output}; +} - loco::NodeShape visit(const luci::CircleElu *node) final - { - auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>(); +loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported"); - return loco::NodeShape{input_shape}; + // Only data format NHWC is supported + int32_t height = input_shape.dim(1).value(); + int32_t width = input_shape.dim(2).value(); + int32_t depth = input_shape.dim(3).value(); + + int block_size = node->block_size(); + + if (block_size < 2) + INTERNAL_EXN("Block size must be >= 2"); + + if ((height % block_size) || (width % block_size)) + { + INTERNAL_EXN("The input tensor's height and width must be divisible by block_size"); } - loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); } + loco::TensorShape output_shape; + output_shape.rank(4); - loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); } + output_shape.dim(0) = input_shape.dim(0).value(); + output_shape.dim(1) = height / block_size; + output_shape.dim(2) = width / block_size; + output_shape.dim(3) = block_size * block_size * depth; - loco::NodeShape visit(const luci::CircleExpandDims *node) final + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node) +{ + loco::TensorShape shape; { - const loco::DataType S32 = loco::DataType::S32; - auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - if (x_shape.rank() == 0) + LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr"); + + auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape()); + if (output_shape_node != nullptr) { - // This maybe for unknown shape. We use shape from the node itself. - return use_own(node); + // Only support node with S32 + LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32, + "Only support int32 CircleConst"); + + if (output_shape_node->rank() != 1) + INTERNAL_EXN_V("Only support rank 1 CircleConst", + oops::to_uint32(output_shape_node->rank())); + + shape.rank(output_shape_node->size<loco::DataType::S32>()); + + for (uint32_t axis = 0; axis < shape.rank(); ++axis) + { + shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis); + } } - auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis()); - LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis"); - if (const_axis->rank() != 0 && const_axis->rank() != 1) + else { - INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum()); + shape = own_shape(node); } - int32_t axis = const_axis->at<S32>(0); - LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) && - (axis >= -1 - static_cast<int32_t>(x_shape.rank())), - "Axis has to be between [-(D+1), D], where D is rank of input."); - size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis; - loco::TensorShape output_shape; - output_shape.rank(x_shape.rank() + 1); - size_t i = 0; - for (; i < positive_axis; i++) - output_shape.dim(i) = x_shape.dim(i); - output_shape.dim(i) = loco::Dimension(1); - for (; i < x_shape.rank(); i++) - output_shape.dim(i + 1) = x_shape.dim(i); - return loco::NodeShape{output_shape}; } - loco::NodeShape visit(const luci::CircleFill *node) final + return loco::NodeShape{shape}; +} + +loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node) +{ + auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin()); + auto end_node = dynamic_cast<luci::CircleConst *>(node->end()); + auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides()); + + if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr) { - loco::TensorShape shape; - { - LUCI_ASSERT(node->dims(), "dims input should not be nullptr"); + return use_own(node); + } - auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims()); - if (dims_node != nullptr) - { - // Only support node with S32 - LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst"); + loco::TensorShape shape = infer_output_shape(node); + return loco::NodeShape{shape}; +} - if (dims_node->rank() != 1) - INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank())); +loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - shape.rank(dims_node->dim(0).value()); + // TODO input shape may be unknown before runtime + std::vector<bool> do_squeeze(input_shape.rank(), false); + uint32_t num_squeezed = 0; - for (uint32_t axis = 0; axis < shape.rank(); ++axis) - { - shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis); - } + if (!node->squeeze_dims().empty()) + { + // SqueezeDims not empty, squeeze only dims specified + for (int32_t raw_dim : node->squeeze_dims()) + { + int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim; + + if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() || + input_shape.dim(dim).value() != 1) + { + INTERNAL_EXN("invalid dimention specified to Squeeze"); } - else + + if (!do_squeeze[dim]) + ++num_squeezed; + do_squeeze[dim] = true; + } + } + else + { + // SqueezeDims empty, squeeze any dims with size == 1 + for (uint32_t dim = 0; dim < input_shape.rank(); ++dim) + { + if (input_shape.dim(dim) == 1) { - shape = own_shape(node); + do_squeeze[dim] = true; + ++num_squeezed; } } + } - return loco::NodeShape{shape}; + loco::TensorShape output_shape; + output_shape.rank(input_shape.rank() - num_squeezed); + + for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim) + { + if (!do_squeeze[in_dim]) + { + output_shape.dim(out_dim++) = input_shape.dim(in_dim); + } } - loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); } + return loco::NodeShape{output_shape}; +} - loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); } +loco::NodeShape infer_tile(const luci::CircleTile *node) +{ + const loco::DataType S32 = loco::DataType::S32; - loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); } + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples()); - loco::NodeShape visit(const luci::CircleFullyConnected *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>(); + // TODO support non-const case + // TODO support S64 type + LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples"); + LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1") - // Checking shape capability for fully connected layer - // Input: a tensor of at least rank 2 [D1, D2, ... Dn] - // Weight: [# of units, K] - // Output: [D1 * D2 * ... * Dn / K, # of units] - if (input_shape.rank() < 2 || weights_shape.rank() != 2) - { - // Return node own shape if shape inference is not possible - return use_own(node); - } + uint32_t n = multiples->dim(0).value(); - uint32_t input_size = 1; - for (uint32_t i = 0; i < input_shape.rank(); i++) - { - input_size = input_size * input_shape.dim(i).value(); - } - const uint32_t batch_size = input_size / weights_shape.dim(1).value(); - loco::TensorShape out_shape; - out_shape.rank(2); - out_shape.dim(0) = batch_size; - out_shape.dim(1) = weights_shape.dim(0); + LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank"); - return loco::NodeShape{out_shape}; - } + loco::TensorShape output_shape; - loco::NodeShape visit(const luci::CircleGather *node) final + output_shape.rank(input_shape.rank()); + for (uint32_t ni = 0; ni < n; ++ni) { - loco::TensorShape output_shape; + int32_t multiple = multiples->at<S32>(ni); + output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple); + } - const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>(); - const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); - int32_t axis = node->axis(); + return loco::NodeShape{output_shape}; +} - // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the - // shape that node already has. - if (input_shape.rank() == 0 || positions_shape.rank() == 0) - return use_own(node); +loco::NodeShape infer_transpose(const luci::CircleTranspose *node) +{ + auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>(); - if (axis < 0) - axis += input_shape.rank(); + auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm()); - output_shape.rank(input_shape.rank() - 1 + positions_shape.rank()); - int32_t outdim_index = 0; - for (int32_t i = 0; i < axis; ++i) - output_shape.dim(outdim_index++) = input_shape.dim(i); - for (uint32_t i = 0; i < positions_shape.rank(); ++i) - output_shape.dim(outdim_index++) = positions_shape.dim(i); - for (uint32_t i = axis + 1; i < input_shape.rank(); ++i) - output_shape.dim(outdim_index++) = input_shape.dim(i); + loco::TensorShape output_shape; + output_shape.rank(input_shape.rank()); - return loco::NodeShape{output_shape}; - } + assert(perm_node->dtype() == loco::DataType::S32); + assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>()); - loco::NodeShape visit(const luci::CircleGatherNd *node) final + for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++) { - loco::TensorShape output_shape; + auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis); + output_shape.dim(out_axis) = input_shape.dim(in_axis); + } - const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>(); - const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); + return output_shape; +} - const auto params_rank = params_shape.rank(); - const auto indices_rank = indices_shape.rank(); +loco::NodeShape infer_transpose_conv(const luci::CircleTransposeConv *node) +{ + // TransposeConv's output shape is written in its 'inputSizes' argument + auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes()); + // TODO support non-const type + LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype") + LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4, + "Only support rank 1 with 4 entries") - // see https://www.tensorflow.org/api_docs/python/tf/gather_nd - // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:] - // batch_dims isn't supported in tflite + loco::TensorShape shape; - // TODO: replace exceptions with setting shape to unknown? + shape.rank(4); + for (uint32_t axis = 0; axis < 4; ++axis) + shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis); - if (!indices_shape.dim(indices_rank - 1).known()) - INTERNAL_EXN("Last indices dimension is unknown"); + return loco::NodeShape{shape}; +} - auto indices_last_dim = indices_shape.dim(indices_rank - 1).value(); +loco::NodeShape infer_unpack(const luci::CircleUnpack *node) +{ + // CircleUnpack provides list(array) of Tensors which has one less dimension of the input + // We'll set shape of CircleUnpack to shape of actual outputs + // TODO fix this if any problem rises + auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>(); - if (indices_last_dim > params_rank) - INTERNAL_EXN("Last indices dimension should be <= params rank"); + auto axis = node->axis(); + auto num = node->num(); + auto rank = static_cast<int32_t>(value_shape.rank()); - const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1; + if (rank == 0) + { + // Unknown shape + return use_own(node); + } - output_shape.rank(output_rank); + LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range"); - uint32_t output_index = 0; - for (uint32_t i = 0; i < indices_rank - 1; ++i) - { - auto &dim = indices_shape.dim(i); - if (!dim.known()) - INTERNAL_EXN("Unknown indices dimension is unsupported"); - output_shape.dim(output_index++).set(dim.value()); - } + if (axis < 0) + axis += rank; - for (uint32_t i = indices_last_dim; i < params_rank; ++i) - { - auto &dim = params_shape.dim(i); - if (!dim.known()) - INTERNAL_EXN("Unknown params dimension is unsupported"); - output_shape.dim(output_index++).set(dim.value()); - } + LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()), + "num, axis maybe incorrect"); - return loco::NodeShape{output_shape}; + loco::TensorShape output_shape; + output_shape.rank(rank - 1); + + for (int32_t i = 0, o = 0; i < rank; ++i) + { + if (i != axis) + output_shape.dim(o++) = value_shape.dim(i); } - loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); } + return loco::NodeShape{output_shape}; +} - loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); } +loco::NodeShape infer_unique(const luci::CircleUnique *node) +{ + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleIf *node) final + assert(input_shape.rank() == 1); + + loco::TensorShape shape_output; + shape_output = own_shape(node); + + return loco::NodeShape{shape_output}; +} + +// Circle Only +loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *node) +{ + loco::TensorShape out_shape; + + auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters()); + + LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2"); + + int32_t qbits_sum = 0; + for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i) { - // Shape of CircleIf is not used. Just use input 0 - assert(node->input_count() > 0); - const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>(); - return loco::NodeShape{input_shape}; + qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1); } - loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); } + out_shape.rank(2); + out_shape.dim(0) = qbits_sum; + out_shape.dim(1) = input_shape.dim(1); - loco::NodeShape visit(const luci::CircleL2Pool2D *node) final + return loco::NodeShape{out_shape}; +} + +loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node) +{ + loco::TensorShape input_shape; + loco::TensorShape output_shape; + + const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>(); + const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); + auto axis = node->axis(); + + auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters()); + auto qbits_sum = 0; + for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i) { - return infer_pool_2d_shape(node); + qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1); } - loco::NodeShape visit(const luci::CircleLeakyRelu *node) final + input_shape.rank(2); + input_shape.dim(0) = qbits_sum; + input_shape.dim(1) = input_binary_shape.dim(1).value() * 32; + + output_shape.rank(input_shape.rank() - 1 + indices_shape.rank()); + int32_t outdim_index = 0; + for (int32_t i = 0; i < axis; ++i) + output_shape.dim(outdim_index++) = input_shape.dim(i); + for (uint32_t i = 0; i < indices_shape.rank(); ++i) + output_shape.dim(outdim_index++) = indices_shape.dim(i); + for (uint32_t i = axis + 1; i < input_shape.rank(); ++i) + output_shape.dim(outdim_index++) = input_shape.dim(i); + + return loco::NodeShape{output_shape}; +} + +// Virtual +loco::NodeShape infer_input(const luci::CircleInput *node) +{ + loco::TensorShape shape; + + shape.rank(node->rank()); + for (uint32_t axis = 0; axis < node->rank(); axis++) + shape.dim(axis) = node->dim(axis); + + return loco::NodeShape{shape}; +} + +loco::NodeShape infer_output(const luci::CircleOutput *node) +{ + auto graph_outputs = node->graph()->outputs(); + auto graph_output = graph_outputs->at(node->index()); + auto output_shape = graph_output->shape(); + + return loco::NodeShape{*output_shape}; +} + +loco::NodeShape infer_if_out(const luci::CircleIfOut *node) +{ + /** + * @note IF operator type and shape are that of the "then" and "else" + * Graph Outputs. + */ + auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input()); + if (circle_if == nullptr) { - const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>(); - return loco::NodeShape{input_shape}; + INTERNAL_EXN("CircleIf IR is not configured correctly"); } - loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); } + auto index = node->index(); + auto then_graph = circle_if->then_graph(); + auto else_graph = circle_if->else_graph(); + assert(then_graph != nullptr); + assert(else_graph != nullptr); - loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); } + // shape and type are assumed to be same + // these are checked at post_import_graph() in Import + auto then_outputs = loco::output_nodes(then_graph); + auto else_outputs = loco::output_nodes(else_graph); + assert(then_outputs.size() == else_outputs.size()); + assert(index < static_cast<int32_t>(then_outputs.size())); - loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final + auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index)); + auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index)); + + auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items + auto else_graph_outputs = else_graph->outputs(); + assert(then_graph_outputs->size() == else_graph_outputs->size()); + + auto then_graph_output = then_graph_outputs->at(then_out->index()); + auto else_graph_output = else_graph_outputs->at(else_out->index()); + (void)else_graph_output; // make compiler happy for unused variable warnings + assert(*then_graph_output->shape() == *else_graph_output->shape()); + + return loco::NodeShape{*then_graph_output->shape()}; +} + +loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input()); + if (nmsv4 == nullptr) + INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly"); + + auto index = node->index(); + if (index == 1) + return loco::TensorShape({0}); + + assert(index == 0); + + auto unknown = loco::TensorShape{loco::Dimension()}; + auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size()); + if (max_output_size == nullptr) + return unknown; // we need CircleConst for max output size + + LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size"); + + if (max_output_size->size<S32>() < 1) + return unknown; + + auto max_output_size_value = uint32_t(max_output_size->at<S32>(0)); + return loco::TensorShape{max_output_size_value}; +} + +loco::NodeShape infer_non_max_suppression_v5_out(const luci::CircleNonMaxSuppressionV5Out *node) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto nmsv5 = dynamic_cast<const luci::CircleNonMaxSuppressionV5 *>(node->input()); + if (nmsv5 == nullptr) + INTERNAL_EXN("CircleNonMaxSuppressionV5 IR is not configured correctly"); + + auto index = node->index(); + if (index == 2) + return loco::TensorShape({0}); + + assert(index == 0 || index == 1); + + auto unknown = loco::TensorShape{loco::Dimension()}; + auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv5->max_output_size()); + if (max_output_size == nullptr) + return unknown; // we need CircleConst for max output size + + LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size"); + + if (max_output_size->size<S32>() < 1) + return unknown; + + auto max_output_size_value = uint32_t(max_output_size->at<S32>(0)); + return loco::TensorShape{max_output_size_value}; +} + +loco::NodeShape infer_split_out(const luci::CircleSplitOut *node) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto split = dynamic_cast<const luci::CircleSplit *>(node->input()); + if (split == nullptr) + INTERNAL_EXN("CircleSplit IR is not configured correctly"); + + loco::NodeShape unknown; + + auto split_shape = loco::shape_get(split).as<loco::TensorShape>(); + + auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim()); + if (split_dim == nullptr) + return unknown; // we need CircleConst for split_dim + LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim"); + + assert(split_dim->size<S32>() == 1); + auto split_dim_axis = split_dim->at<S32>(0); + if (split_dim_axis < 0) + split_dim_axis += split_shape.rank(); + + auto split_dim_value = split_shape.dim(split_dim_axis).value(); + assert(split_dim_value % split->num_split() == 0); + const int split_depth = split_dim_value / split->num_split(); + + loco::TensorShape output_shape = split_shape; + + // All shapes are equally same + output_shape.dim(split_dim_axis) = loco::Dimension(split_depth); + + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node) +{ + const loco::DataType S32 = loco::DataType::S32; + + auto split = dynamic_cast<const luci::CircleSplitV *>(node->input()); + if (split == nullptr) + INTERNAL_EXN("CircleSplit IR is not configured correctly"); + + loco::NodeShape unknown; + + auto split_shape = loco::shape_get(split).as<loco::TensorShape>(); + + auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits()); + if (size_splits == nullptr) + return unknown; // we need CircleConst for size_splits + LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits"); + + auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim()); + if (split_dim == nullptr) + return unknown; // we need CircleConst for split_dim + LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim"); + + // fetch axis + assert(split_dim->size<S32>() == 1); + auto split_dim_axis = split_dim->at<S32>(0); + if (split_dim_axis < 0) + split_dim_axis += split_shape.rank(); + + // interpret size_splits values + int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>()); + assert(size_splits_count == split->num_split()); + + int64_t minus_one_count = 0, size_splits_sum = 0; + for (int32_t idx = 0; idx < size_splits_count; ++idx) { - const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - return loco::NodeShape{input_shape}; + auto size = size_splits->at<S32>(idx); + assert(size >= -1); + if (size == -1) + ++minus_one_count; + else + size_splits_sum += size; } + if (minus_one_count > 1) + INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values"); - loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); } + // calcuate this SplitVOut shape + auto input_size = split_shape.dim(split_dim_axis).value(); + assert(size_splits_sum <= input_size); - loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); } + auto index_this = node->index(); + assert(0 <= index_this && index_this < split->num_split()); + auto split_depth = size_splits->at<S32>(index_this); + if (split_depth == -1) + split_depth = input_size - size_splits_sum; - loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); } + loco::TensorShape output_shape = split_shape; - loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); } + output_shape.dim(split_dim_axis) = loco::Dimension(split_depth); - loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); } + return loco::NodeShape{output_shape}; +} - loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>(); +loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node) +{ + const loco::DataType S32 = loco::DataType::S32; - auto rank = diagonal_shape.rank(); + auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input()); + if (topkv2 == nullptr) + INTERNAL_EXN("CircleSplit IR is not configured correctly"); - LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1"); + // shape of topkv2 is same as topkv2->input() + auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>(); - for (uint32_t i = 0; i < rank - 1; i++) - { - LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims"); - } + auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k()); + LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32"); + assert(node_k->size<S32>() == 1); - auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value()); + loco::TensorShape output_shape; - LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error"); + output_shape.rank(input_shape.rank()); + for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx) + { + output_shape.dim(idx) = input_shape.dim(idx); + } + output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0); - return loco::NodeShape{input_shape}; + return loco::NodeShape{output_shape}; +} + +loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node) +{ + if (node->index() == 0) + { + auto unique_shape = own_shape(node); + return loco::NodeShape{unique_shape}; } + assert(node->index() == 1); + auto unique = loco::must_cast<luci::CircleUnique *>(node->input()); + auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); } + assert(unique_shape.rank() == 1); - loco::NodeShape visit(const luci::CircleMatrixDiag *node) final + loco::TensorShape shape_output; + shape_output.rank(1); + shape_output.dim(0) = unique_shape.dim(0); + return loco::NodeShape{shape_output}; +} + +loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node) +{ + auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input()); + if (unpack == nullptr) { - loco::TensorShape output_shape; + INTERNAL_EXN("CircleUnpack IR is not configured correctly"); + } - auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>(); - auto rank = diagonal_shape.rank(); + auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>(); - output_shape.rank(rank + 1); + return loco::NodeShape{unpack_shape}; +} - for (uint32_t i = 0; i < rank; i++) - { - output_shape.dim(i) = diagonal_shape.dim(i); - } +loco::NodeShape infer_while_out(const luci::CircleWhileOut *node) +{ + /** + * @note WHILE operator's shape is the same with the "cond" + * Graph input. + */ + auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input()); + if (circle_while == nullptr) + { + INTERNAL_EXN("CircleWhile IR is not configured correctly"); + } - output_shape.dim(rank) = diagonal_shape.dim(rank - 1); + auto index = node->index(); + auto cond_graph = circle_while->cond_graph(); + assert(cond_graph != nullptr); - return loco::NodeShape{output_shape}; + // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by + // loco::input_nodes + auto cond_inputs = loco::input_nodes(cond_graph); + auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index)); + + auto cond_graph_inputs = cond_graph->inputs(); + auto cond_graph_input = cond_graph_inputs->at(cond_in->index()); + + auto cond_graph_input_shape = *cond_graph_input->shape(); + auto this_shape = own_shape(node); + + if (!(this_shape == cond_graph_input_shape)) + { + LOGGER(l); + WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape + << " vs " << cond_graph_input_shape; } - loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); } + return loco::NodeShape{this_shape}; +} - loco::NodeShape visit(const luci::CircleMaxPool2D *node) final +/** + * @brief Class to infer the shape of CircleNode + * + * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor + */ +class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape> +{ +public: + loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); } + + loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); } + + loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); } + + loco::NodeShape visit(const luci::CircleAveragePool2D *node) final { return infer_pool_2d_shape(node); } - loco::NodeShape visit(const luci::CircleMean *node) final + loco::NodeShape visit(const luci::CircleBatchMatMul *node) final { - auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims()); - return loco::NodeShape{output_shape}; - } + auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>(); + auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>(); - loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); } + return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y()); + } - loco::NodeShape visit(const luci::CircleMirrorPad *node) final + loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final { - const loco::DataType S32 = loco::DataType::S32; + return infer_batch_to_space_nd(node); + } - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); + loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); } - // TODO support non-const case - // TODO support other data type - LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now"); - LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2") + loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); } - int32_t n = paddings->dim(0).value(); - int32_t v = paddings->dim(1).value(); + loco::NodeShape visit(const luci::CircleConcatenation *node) final + { + return infer_concatenation(node); + } - LUCI_ASSERT(v == 2, "paddings should be [n, 2]"); - LUCI_ASSERT(n == int32_t(input_shape.rank()), - "paddings [n, 2] should have same value of input rank"); + loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); } - loco::TensorShape output_shape; + loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); } - output_shape.rank(input_shape.rank()); - for (int32_t ni = 0; ni < n; ++ni) - { - int32_t idx = ni * 2; - int value = input_shape.dim(ni).value(); - value += paddings->at<S32>(idx + 0); // left - value += paddings->at<S32>(idx + 1); // right - output_shape.dim(ni) = value; - } + loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); } - return loco::NodeShape{output_shape}; + loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); } + + loco::NodeShape visit(const luci::CircleDepthToSpace *node) final + { + return infer_depth_to_space(node); } - loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); } + loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final + { + return infer_depthwise_conv2d(node); + } - loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); } + loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); } - loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final + loco::NodeShape visit(const luci::CircleElu *node) final { - const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>(); - return loco::NodeShape{boxes_shape}; + auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>(); + + return loco::NodeShape{input_shape}; } - loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); } + loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); } - loco::NodeShape visit(const luci::CircleOneHot *node) final + loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleExpandDims *node) final { - const loco::DataType S32 = loco::DataType::S32; - auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); - // Only support OneHot node's depth() is CircleConst with type S32 - // TODO support depth with other types - auto depth = loco::must_cast<luci::CircleConst *>(node->depth()); - LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst"); - if (depth->rank() != 0) - INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank())); - loco::TensorShape output_shape; - output_shape.rank(indices_shape.rank() + 1); - auto axis = node->axis(); - if (axis < 0) - axis += indices_shape.rank() + 1; - LUCI_ASSERT(0 <= axis, "Axis is out of range"); - LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range"); - uint32_t j = 0; - for (uint32_t i = 0; i < output_shape.rank(); i++) - { - if (i == static_cast<uint32_t>(axis)) - { - output_shape.dim(i) = depth->at<S32>(0); - } - else - { - output_shape.dim(i) = indices_shape.dim(j++); - } - } - return loco::NodeShape{output_shape}; + return infer_expand_dims(node); } - loco::NodeShape visit(const luci::CirclePack *node) final + loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); } + + loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleFullyConnected *node) final { - LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs"); + return infer_fully_connected(node); + } - auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>(); - // Make sure all inputs have the same shape. - for (uint32_t i = 1; i < node->values_count(); ++i) - { - auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>(); - LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape}, - "All inputs must have the same shape"); - } + loco::NodeShape visit(const luci::CircleGather *node) final { return infer_gather(node); } - // Checking shape capability for pack layer - // Input: tensors [D1, D2, ... Dn] - // Axis: K - // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn] - auto axis = node->axis(); - if (axis < 0) - axis += first_shape.rank() + 1; + loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); } - LUCI_ASSERT(0 <= axis, "Axis is out of range"); - LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range"); + loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); } - loco::TensorShape output_shape; - output_shape.rank(first_shape.rank() + 1); + loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); } - uint32_t j = 0; - for (uint32_t i = 0; i < output_shape.rank(); ++i) - { - if (i == static_cast<uint32_t>(axis)) - { - output_shape.dim(i) = node->values_count(); - } - else - { - output_shape.dim(i) = first_shape.dim(j++); - } - } + loco::NodeShape visit(const luci::CircleIf *node) final + { + // Shape of CircleIf is not used. Just use input 0 + assert(node->input_count() > 0); + const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>(); + return loco::NodeShape{input_shape}; + } - return loco::NodeShape{output_shape}; + loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleL2Pool2D *node) final + { + return infer_pool_2d_shape(node); } - loco::NodeShape visit(const luci::CirclePad *node) final + loco::NodeShape visit(const luci::CircleLeakyRelu *node) final { - const loco::DataType S32 = loco::DataType::S32; + const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>(); + return loco::NodeShape{input_shape}; + } - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); + loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); } - // TODO support non-const case - // TODO support other data type - LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now"); - LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2") + loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); } - int32_t n = paddings->dim(0).value(); - int32_t v = paddings->dim(1).value(); + loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final + { + const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); + return loco::NodeShape{input_shape}; + } - LUCI_ASSERT(v == 2, "paddings should be [n, 2]"); - LUCI_ASSERT(n == int32_t(input_shape.rank()), - "paddings [n, 2] should have same value of input rank"); + loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); } - loco::TensorShape output_shape; + loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); } - output_shape.rank(input_shape.rank()); - for (int32_t ni = 0; ni < n; ++ni) - { - int32_t idx = ni * 2; - int value = input_shape.dim(ni).value(); - value += paddings->at<S32>(idx + 0); // left - value += paddings->at<S32>(idx + 1); // right - output_shape.dim(ni) = value; - } + loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); } - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); } - loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); } + loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); } - loco::NodeShape visit(const luci::CirclePRelu *node) final + loco::NodeShape visit(const luci::CircleMatrixDiag *node) final { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>(); + return infer_matrix_diag(node); + } + + loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final + { + return infer_matrix_set_diag(node); + } - auto output_shape = broadcast_shape(input_shape, alpha_shape); + loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleMaxPool2D *node) final + { + return infer_pool_2d_shape(node); + } + loco::NodeShape visit(const luci::CircleMean *node) final + { + auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims()); return loco::NodeShape{output_shape}; } - loco::NodeShape visit(const luci::CircleRange *node) final + loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleMirrorPad *node) final { return infer_mirror_pad(node); } + + loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); } + + loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); } + + loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final { - loco::TensorShape output_shape; - output_shape.rank(1); + const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>(); + return loco::NodeShape{boxes_shape}; + } - auto start_node = dynamic_cast<luci::CircleConst *>(node->start()); - auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit()); - auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta()); + loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final + { + const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>(); + return loco::NodeShape{boxes_shape}; + } - if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr) - { - return use_own(node); - } + loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); } - double start = 0, limit = 0, delta = 0; + loco::NodeShape visit(const luci::CircleOneHot *node) final { return infer_one_hot(node); } -#define GET_RANGE_PARAM(DT) \ - start = start_node->scalar<DT>(); \ - limit = limit_node->scalar<DT>(); \ - delta = delta_node->scalar<DT>(); + loco::NodeShape visit(const luci::CirclePack *node) final { return infer_pack(node); } - switch (start_node->dtype()) - { - case loco::DataType::FLOAT32: - GET_RANGE_PARAM(loco::DataType::FLOAT32) - break; - case loco::DataType::S32: - GET_RANGE_PARAM(loco::DataType::S32) - break; - default: - INTERNAL_EXN("Range data type not supported"); - } + loco::NodeShape visit(const luci::CirclePad *node) final { return infer_pad(node); } -#undef GET_RANGE_PARAM + loco::NodeShape visit(const luci::CirclePadV2 *node) final { return infer_pad_v2(node); } - if (delta == 0) - INTERNAL_EXN("Delta can not be zero"); + loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); } - output_shape.dim(0) = ceil((limit - start) / delta); + loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); } - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); } loco::NodeShape visit(const luci::CircleRank *) final { @@ -1242,136 +2248,16 @@ public: * * TODO Change this policy when not appropriate */ - loco::NodeShape visit(const luci::CircleReshape *node) final - { - LOGGER(l); - - const loco::DataType S32 = loco::DataType::S32; - - loco::TensorShape shape_by_input; - { - LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr"); - - // Only support node's shape() is CircleConst with S32 - // TODO support other node with other types - auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape()); - if (const_shape_node != nullptr) - { - LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst"); - - shape_by_input.rank(const_shape_node->size<S32>()); - - for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis) - { - shape_by_input.dim(axis) = const_shape_node->at<S32>(axis); - } - } - else - { - // We use shape from the node itself - shape_by_input = own_shape(node); - } - } - - loco::TensorShape shape_by_attr; - { - shape_by_attr.rank(node->newShape()->rank()); - - for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis) - { - shape_by_attr.dim(axis) = node->newShape()->dim(axis); - } - } - - if (!(shape_by_input == shape_by_attr)) - { - INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl; - INFO(l) << " shape_by_input : " << shape_by_input << std::endl; - INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl; - } - - loco::TensorShape output_shape = shape_by_input; - - // One of the dimensions can have special value -1, meaning its actual value should be inferred. - const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>(); - const uint32_t input_element_count = loco::element_count(&input_shape); - uint32_t output_element_count = 1; - uint32_t unknown_dim_index = UINT32_MAX; - for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index) - { - const uint32_t dim_value = output_shape.dim(dim_index).value(); - if (static_cast<int>(dim_value) == -1) - { - LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension"); - unknown_dim_index = dim_index; - } - else - { - output_element_count *= dim_value; - } - } - if (unknown_dim_index != UINT32_MAX) - { - output_shape.dim(unknown_dim_index) = input_element_count / output_element_count; - } - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleReshape *node) final { return infer_reshape(node); } loco::NodeShape visit(const luci::CircleResizeBilinear *node) final { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeBilinear input to have rank 4"); - - auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); - - if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size"); - - if (const_node->rank() != 1) - INTERNAL_EXN("Expected size tensor of rank 1"); - - if (const_node->dim(0).value() != 2) - INTERNAL_EXN("Expected size tensor with shape [2]"); - - loco::TensorShape output_shape; - output_shape.rank(4); - output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); - output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); - output_shape.dim(3) = input_shape.dim(3); - - return loco::NodeShape{output_shape}; + return infer_resize_bilinear(node); } loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4"); - - auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); - - if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size"); - - if (const_node->rank() != 1) - INTERNAL_EXN("Expected size tensor of rank 1"); - - if (const_node->dim(0).value() != 2) - INTERNAL_EXN("Expected size tensor with shape [2]"); - - loco::TensorShape output_shape; - output_shape.rank(4); - output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); - output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); - output_shape.dim(3) = input_shape.dim(3); - - return loco::NodeShape{output_shape}; + return infer_resize_nearest_neighbor(node); } loco::NodeShape visit(const luci::CircleReverseSequence *node) final @@ -1395,276 +2281,38 @@ public: loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); } - loco::NodeShape visit(const luci::CircleScatterNd *node) final - { - loco::TensorShape output_shape; - - auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape()); - - const loco::DataType S32 = loco::DataType::S32; - const loco::DataType S64 = loco::DataType::S64; - - std::vector<int64_t> vect_shape; - - if (shape_node->dtype() == S32) - vect_shape = vector_from_constant<S32>(shape_node); - else if (shape_node->dtype() == S64) - vect_shape = vector_from_constant<S64>(shape_node); - else - LUCI_ASSERT(false, "Only support int32/int64 for shape()"); - - output_shape.rank(vect_shape.size()); - for (uint32_t i = 0; i < vect_shape.size(); ++i) - output_shape.dim(i) = vect_shape[i]; - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleScatterNd *node) final { return infer_scatter_nd(node); } loco::NodeShape visit(const luci::CircleSegmentSum *node) final { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>(); - - LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor"); - LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(), - "segment_ids size must be equal to the size of data's first dimension"); - - auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids()); - - std::vector<int64_t> vect_ids; - - if (ids_shape_value->dtype() == loco::DataType::S32) - vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value); - - LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()), - "segment_ids values should be sorted") - - loco::TensorShape output_shape; - - output_shape.rank(input_shape.rank()); - - for (uint32_t i = 1; i < input_shape.rank(); ++i) - output_shape.dim(i) = input_shape.dim(i); - - output_shape.dim(0) = vect_ids.back() + 1; - - return loco::NodeShape{output_shape}; + return infer_segment_sum(node); } - loco::NodeShape visit(const luci::CircleSelect *node) final - { - auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>(); - assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>()); + loco::NodeShape visit(const luci::CircleSelect *node) final { return infer_select(node); } - // condition shape validation - auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>(); - if (c_shape.rank() != t_shape.rank()) - { - if (c_shape.rank() != 0 && c_shape.rank() != 1) - INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank()); + loco::NodeShape visit(const luci::CircleSelectV2 *node) final { return infer_select_v2(node); } - if (c_shape.rank() == 1) - { - if (c_shape.dim(0).value() != t_shape.dim(0).value()) - INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)"); - } - } - - return loco::NodeShape{t_shape}; - } - - loco::NodeShape visit(const luci::CircleSelectV2 *node) final - { - auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>(); - auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>(); - auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>(); - - // validate ability to broadcast shapes to each other - auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape); - return loco::NodeShape{b_shape}; - } - - loco::NodeShape visit(const luci::CircleShape *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - loco::TensorShape output_shape; - - output_shape.rank(1); - output_shape.dim(0) = input_shape.rank(); - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleShape *node) final { return infer_shape(node); } loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); } - loco::NodeShape visit(const luci::CircleSlice *node) final - { - const loco::DataType S32 = loco::DataType::S32; - const loco::DataType S64 = loco::DataType::S64; - - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin()); - auto const_size = loco::must_cast<luci::CircleConst *>(node->size()); - - loco::TensorShape output_shape; - std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t - std::vector<int64_t> vect_size; - - if (const_begin->dtype() == S32) - vect_begin = vector_from_constant<S32>(const_begin); - else if (const_begin->dtype() == S64) - vect_begin = vector_from_constant<S64>(const_begin); - else - LUCI_ASSERT(false, "Only support int32/int64 for begin()"); - - if (const_size->dtype() == S32) - vect_size = vector_from_constant<S32>(const_size); - else if (const_size->dtype() == S64) - vect_size = vector_from_constant<S64>(const_size); - else - LUCI_ASSERT(false, "Only support int32/int64 for size()"); - - assert(input_shape.rank() == vect_begin.size()); - assert(input_shape.rank() == vect_size.size()); - - output_shape.rank(vect_begin.size()); - for (uint32_t idx = 0; idx < vect_begin.size(); ++idx) - { - auto size = vect_size.at(idx); - if (size == -1) - { - size = input_shape.dim(idx).value() - vect_begin.at(idx); - } - output_shape.dim(idx) = size; - } - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleSlice *node) final { return infer_slice(node); } loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); } loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final { - const loco::DataType S32 = loco::DataType::S32; - - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - // Support only input rank is 3 and 4 - assert(input_shape.rank() == 3 || input_shape.rank() == 4); - - // Only support block_shape() with S32 type CircleConst for now - auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape()); - LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape"); - - // Only support paddings() with S32 type CircleConst for now - auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings()); - LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings"); - - auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>(); - auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>(); - assert(const_block_shape_shape.rank() == 1); - assert(const_paddings_shape.rank() == 2); - - int32_t input_spatial_dim = input_shape.rank() - 2; - assert(const_block_shape_shape.dim(0) == input_spatial_dim); - assert(const_paddings_shape.dim(0) == input_spatial_dim); - assert(const_paddings_shape.dim(1) == 2); - - // Check all values of block_shape >= 1 - uint32_t ele_count = const_block_shape->size<S32>(); - for (uint32_t e = 0; e < ele_count; ++e) - { - auto val = const_block_shape->at<S32>(e); - if (val < 1) - { - INTERNAL_EXN_V("All values of block_shape >= 1: ", e); - } - } - - loco::TensorShape shape_output; - - shape_output.rank(input_shape.rank()); - - int32_t output_batch_size = input_shape.dim(0).value(); - for (int32_t dim = 0; dim < input_spatial_dim; ++dim) - { - int dim_size = input_shape.dim(dim + 1).value(); - dim_size += const_paddings->at<S32>(dim * 2); - dim_size += const_paddings->at<S32>(dim * 2 + 1); - shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim); - - assert(dim_size % const_block_shape->at<S32>(dim) == 0); - output_batch_size = output_batch_size * const_block_shape->at<S32>(dim); - } - shape_output.dim(0) = output_batch_size; - shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1); - - return loco::NodeShape{shape_output}; + return infer_space_to_batch_nd(node); } loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported"); - - // Only data format NHWC is supported - int32_t height = input_shape.dim(1).value(); - int32_t width = input_shape.dim(2).value(); - int32_t depth = input_shape.dim(3).value(); - - int block_size = node->block_size(); - - if (block_size < 2) - INTERNAL_EXN("Block size must be >= 2"); - - if ((height % block_size) || (width % block_size)) - { - INTERNAL_EXN("The input tensor's height and width must be divisible by block_size"); - } - - loco::TensorShape output_shape; - output_shape.rank(4); - - output_shape.dim(0) = input_shape.dim(0).value(); - output_shape.dim(1) = height / block_size; - output_shape.dim(2) = width / block_size; - output_shape.dim(3) = block_size * block_size * depth; - - return loco::NodeShape{output_shape}; + return infer_space_to_depth(node); } loco::NodeShape visit(const luci::CircleSparseToDense *node) final { - loco::TensorShape shape; - { - LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr"); - - auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape()); - if (output_shape_node != nullptr) - { - // Only support node with S32 - LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst"); - - if (output_shape_node->rank() != 1) - INTERNAL_EXN_V("Only support rank 1 CircleConst", - oops::to_uint32(output_shape_node->rank())); - - shape.rank(output_shape_node->dim(0).value()); - - for (uint32_t axis = 0; axis < shape.rank(); ++axis) - { - shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis); - } - } - else - { - shape = own_shape(node); - } - } - - return loco::NodeShape{shape}; + return infer_sparse_to_dense(node); } loco::NodeShape visit(const luci::CircleSplit *node) final @@ -1692,71 +2340,10 @@ public: loco::NodeShape visit(const luci::CircleStridedSlice *node) final { - auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin()); - auto end_node = dynamic_cast<luci::CircleConst *>(node->end()); - auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides()); - - if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr) - { - return use_own(node); - } - - loco::TensorShape shape = infer_output_shape(node); - return loco::NodeShape{shape}; + return infer_strided_slice(node); } - loco::NodeShape visit(const luci::CircleSqueeze *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - // TODO input shape may be unknown before runtime - std::vector<bool> do_squeeze(input_shape.rank(), false); - uint32_t num_squeezed = 0; - - if (!node->squeeze_dims().empty()) - { - // SqueezeDims not empty, squeeze only dims specified - for (int32_t raw_dim : node->squeeze_dims()) - { - int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim; - - if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() || - input_shape.dim(dim).value() != 1) - { - INTERNAL_EXN("invalid dimention specified to Squeeze"); - } - - if (!do_squeeze[dim]) - ++num_squeezed; - do_squeeze[dim] = true; - } - } - else - { - // SqueezeDims empty, squeeze any dims with size == 1 - for (uint32_t dim = 0; dim < input_shape.rank(); ++dim) - { - if (input_shape.dim(dim) == 1) - { - do_squeeze[dim] = true; - ++num_squeezed; - } - } - } - - loco::TensorShape output_shape; - output_shape.rank(input_shape.rank() - num_squeezed); - - for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim) - { - if (!do_squeeze[in_dim]) - { - output_shape.dim(out_dim++) = input_shape.dim(in_dim); - } - } - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleSqueeze *node) final { return infer_squeeze(node); } loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); } @@ -1768,33 +2355,7 @@ public: loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); } - loco::NodeShape visit(const luci::CircleTile *node) final - { - const loco::DataType S32 = loco::DataType::S32; - - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples()); - - // TODO support non-const case - // TODO support S64 type - LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples"); - LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1") - - uint32_t n = multiples->dim(0).value(); - - LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank"); - - loco::TensorShape output_shape; - - output_shape.rank(input_shape.rank()); - for (uint32_t ni = 0; ni < n; ++ni) - { - int32_t multiple = multiples->at<S32>(ni); - output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple); - } - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); } loco::NodeShape visit(const luci::CircleTopKV2 *node) final { @@ -1803,93 +2364,16 @@ public: return loco::NodeShape{input_shape}; } - loco::NodeShape visit(const luci::CircleTranspose *node) final - { - auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>(); - - auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm()); - - loco::TensorShape output_shape; - output_shape.rank(input_shape.rank()); - - assert(perm_node->dtype() == loco::DataType::S32); - assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>()); - - for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++) - { - auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis); - output_shape.dim(out_axis) = input_shape.dim(in_axis); - } - - return output_shape; - } - - loco::NodeShape visit(const luci::CircleUnique *node) final - { - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - - assert(input_shape.rank() == 1); - - loco::TensorShape shape_output; - shape_output = own_shape(node); - - return loco::NodeShape{shape_output}; - } + loco::NodeShape visit(const luci::CircleTranspose *node) final { return infer_transpose(node); } loco::NodeShape visit(const luci::CircleTransposeConv *node) final { - // TransposeConv's output shape is written in its 'inputSizes' argument - auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes()); - // TODO support non-const type - LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype") - LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4, - "Only support rank 1 with 4 entries") - - loco::TensorShape shape; - - shape.rank(4); - for (uint32_t axis = 0; axis < 4; ++axis) - shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis); - - return loco::NodeShape{shape}; + return infer_transpose_conv(node); } - loco::NodeShape visit(const luci::CircleUnpack *node) final - { - // CircleUnpack provides list(array) of Tensors which has one less dimension of the input - // We'll set shape of CircleUnpack to shape of actual outputs - // TODO fix this if any problem rises - auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>(); - - auto axis = node->axis(); - auto num = node->num(); - auto rank = static_cast<int32_t>(value_shape.rank()); - - if (rank == 0) - { - // Unknown shape - return use_own(node); - } - - LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range"); - - if (axis < 0) - axis += rank; - - LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()), - "num, axis maybe incorrect"); - - loco::TensorShape output_shape; - output_shape.rank(rank - 1); + loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); } - for (int32_t i = 0, o = 0; i < rank; ++i) - { - if (i != axis) - output_shape.dim(o++) = value_shape.dim(i); - } - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); } loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); } @@ -1911,57 +2395,10 @@ public: // Circle Only loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final { - loco::TensorShape out_shape; - - auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); - auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters()); - - LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2"); - - int32_t qbits_sum = 0; - for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i) - { - qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1); - } - - out_shape.rank(2); - out_shape.dim(0) = qbits_sum; - out_shape.dim(1) = input_shape.dim(1); - - return loco::NodeShape{out_shape}; + return infer_bcq_fully_connected(node); } - loco::NodeShape visit(const luci::CircleBCQGather *node) final - { - loco::TensorShape input_shape; - loco::TensorShape output_shape; - - const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>(); - const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>(); - auto axis = node->axis(); - - auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters()); - auto qbits_sum = 0; - for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i) - { - qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1); - } - - input_shape.rank(2); - input_shape.dim(0) = qbits_sum; - input_shape.dim(1) = input_binary_shape.dim(1).value() * 32; - - output_shape.rank(input_shape.rank() - 1 + indices_shape.rank()); - int32_t outdim_index = 0; - for (int32_t i = 0; i < axis; ++i) - output_shape.dim(outdim_index++) = input_shape.dim(i); - for (uint32_t i = 0; i < indices_shape.rank(); ++i) - output_shape.dim(outdim_index++) = indices_shape.dim(i); - for (uint32_t i = axis + 1; i < input_shape.rank(); ++i) - output_shape.dim(outdim_index++) = input_shape.dim(i); - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleBCQGather *node) final { return infer_bcq_gather(node); } loco::NodeShape visit(const luci::CircleInstanceNorm *node) final { @@ -1971,25 +2408,9 @@ public: } // Virtual - loco::NodeShape visit(const luci::CircleInput *node) final - { - loco::TensorShape shape; + loco::NodeShape visit(const luci::CircleInput *node) final { return infer_input(node); } - shape.rank(node->rank()); - for (uint32_t axis = 0; axis < node->rank(); axis++) - shape.dim(axis) = node->dim(axis); - - return loco::NodeShape{shape}; - } - - loco::NodeShape visit(const luci::CircleOutput *node) final - { - auto graph_outputs = node->graph()->outputs(); - auto graph_output = graph_outputs->at(node->index()); - auto output_shape = graph_output->shape(); - - return loco::NodeShape{*output_shape}; - } + loco::NodeShape visit(const luci::CircleOutput *node) final { return infer_output(node); } loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); } @@ -1997,259 +2418,32 @@ public: loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); } - loco::NodeShape visit(const luci::CircleIfOut *node) final - { - /** - * @note IF operator type and shape are that of the "then" and "else" - * Graph Outputs. - */ - auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input()); - if (circle_if == nullptr) - { - INTERNAL_EXN("CircleIf IR is not configured correctly"); - } - - auto index = node->index(); - auto then_graph = circle_if->then_graph(); - auto else_graph = circle_if->else_graph(); - assert(then_graph != nullptr); - assert(else_graph != nullptr); - - // shape and type are assumed to be same - // these are checked at post_import_graph() in Import - auto then_outputs = loco::output_nodes(then_graph); - auto else_outputs = loco::output_nodes(else_graph); - assert(then_outputs.size() == else_outputs.size()); - assert(index < static_cast<int32_t>(then_outputs.size())); - - auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index)); - auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index)); - - auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items - auto else_graph_outputs = else_graph->outputs(); - assert(then_graph_outputs->size() == else_graph_outputs->size()); - - auto then_graph_output = then_graph_outputs->at(then_out->index()); - auto else_graph_output = else_graph_outputs->at(else_out->index()); - (void)else_graph_output; // make compiler happy for unused variable warnings - assert(*then_graph_output->shape() == *else_graph_output->shape()); - - return loco::NodeShape{*then_graph_output->shape()}; - } + loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); } loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final { - const loco::DataType S32 = loco::DataType::S32; - - auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input()); - if (nmsv4 == nullptr) - INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly"); - - auto index = node->index(); - if (index == 1) - return loco::TensorShape({0}); - - assert(index == 0); - - auto unknown = loco::TensorShape{loco::Dimension()}; - auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size()); - if (max_output_size == nullptr) - return unknown; // we need CircleConst for max output size - - LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size"); - - if (max_output_size->size<S32>() < 1) - return unknown; - - auto max_output_size_value = uint32_t(max_output_size->at<S32>(0)); - return loco::TensorShape{max_output_size_value}; + return infer_non_max_suppression_v4_out(node); } - loco::NodeShape visit(const luci::CircleSplitOut *node) final + loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final { - const loco::DataType S32 = loco::DataType::S32; - - auto split = dynamic_cast<const luci::CircleSplit *>(node->input()); - if (split == nullptr) - INTERNAL_EXN("CircleSplit IR is not configured correctly"); - - loco::NodeShape unknown; - - auto split_shape = loco::shape_get(split).as<loco::TensorShape>(); - - auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim()); - if (split_dim == nullptr) - return unknown; // we need CircleConst for split_dim - LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim"); - - assert(split_dim->size<S32>() == 1); - auto split_dim_axis = split_dim->at<S32>(0); - if (split_dim_axis < 0) - split_dim_axis += split_shape.rank(); - - auto split_dim_value = split_shape.dim(split_dim_axis).value(); - assert(split_dim_value % split->num_split() == 0); - const int split_depth = split_dim_value / split->num_split(); - - loco::TensorShape output_shape = split_shape; - - // All shapes are equally same - output_shape.dim(split_dim_axis) = loco::Dimension(split_depth); - - return loco::NodeShape{output_shape}; + return infer_non_max_suppression_v5_out(node); } - loco::NodeShape visit(const luci::CircleSplitVOut *node) final - { - const loco::DataType S32 = loco::DataType::S32; - - auto split = dynamic_cast<const luci::CircleSplitV *>(node->input()); - if (split == nullptr) - INTERNAL_EXN("CircleSplit IR is not configured correctly"); - - loco::NodeShape unknown; - - auto split_shape = loco::shape_get(split).as<loco::TensorShape>(); - - auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits()); - if (size_splits == nullptr) - return unknown; // we need CircleConst for size_splits - LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits"); - - auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim()); - if (split_dim == nullptr) - return unknown; // we need CircleConst for split_dim - LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim"); - - // fetch axis - assert(split_dim->size<S32>() == 1); - auto split_dim_axis = split_dim->at<S32>(0); - if (split_dim_axis < 0) - split_dim_axis += split_shape.rank(); - - // interpret size_splits values - int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>()); - assert(size_splits_count == split->num_split()); - - int64_t minus_one_count = 0, size_splits_sum = 0; - for (int32_t idx = 0; idx < size_splits_count; ++idx) - { - auto size = size_splits->at<S32>(idx); - assert(size >= -1); - if (size == -1) - ++minus_one_count; - else - size_splits_sum += size; - } - if (minus_one_count > 1) - INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values"); - - // calcuate this SplitVOut shape - auto input_size = split_shape.dim(split_dim_axis).value(); - assert(size_splits_sum <= input_size); - - auto index_this = node->index(); - assert(0 <= index_this && index_this < split->num_split()); - auto split_depth = size_splits->at<S32>(index_this); - if (split_depth == -1) - split_depth = input_size - size_splits_sum; + loco::NodeShape visit(const luci::CircleSplitOut *node) final { return infer_split_out(node); } - loco::TensorShape output_shape = split_shape; - - output_shape.dim(split_dim_axis) = loco::Dimension(split_depth); - - return loco::NodeShape{output_shape}; - } + loco::NodeShape visit(const luci::CircleSplitVOut *node) final { return infer_split_v_out(node); } loco::NodeShape visit(const luci::CircleTopKV2Out *node) final { - const loco::DataType S32 = loco::DataType::S32; - - auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input()); - if (topkv2 == nullptr) - INTERNAL_EXN("CircleSplit IR is not configured correctly"); - - // shape of topkv2 is same as topkv2->input() - auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>(); - - auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k()); - LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32"); - assert(node_k->size<S32>() == 1); - - loco::TensorShape output_shape; - - output_shape.rank(input_shape.rank()); - for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx) - { - output_shape.dim(idx) = input_shape.dim(idx); - } - output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0); - - return loco::NodeShape{output_shape}; + return infer_top_k_v2_out(node); } - loco::NodeShape visit(const luci::CircleUniqueOut *node) final - { - auto unique = dynamic_cast<const luci::CircleUnique *>(node->input()); - if (unique == nullptr) - { - INTERNAL_EXN("CircleUnique IR is not configured correctly"); - } + loco::NodeShape visit(const luci::CircleUniqueOut *node) final { return infer_unique_out(node); } - auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>(); + loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); } - return loco::NodeShape{unique_shape}; - } - - loco::NodeShape visit(const luci::CircleUnpackOut *node) final - { - auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input()); - if (unpack == nullptr) - { - INTERNAL_EXN("CircleUnpack IR is not configured correctly"); - } - - auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>(); - - return loco::NodeShape{unpack_shape}; - } - - loco::NodeShape visit(const luci::CircleWhileOut *node) final - { - /** - * @note WHILE operator's shape is the same with the "cond" - * Graph input. - */ - auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input()); - if (circle_while == nullptr) - { - INTERNAL_EXN("CircleWhile IR is not configured correctly"); - } - - auto index = node->index(); - auto cond_graph = circle_while->cond_graph(); - assert(cond_graph != nullptr); - - // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by - // loco::input_nodes - auto cond_inputs = loco::input_nodes(cond_graph); - auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index)); - - auto cond_graph_inputs = cond_graph->inputs(); - auto cond_graph_input = cond_graph_inputs->at(cond_in->index()); - - auto cond_graph_input_shape = *cond_graph_input->shape(); - auto this_shape = own_shape(node); - - if (!(this_shape == cond_graph_input_shape)) - { - LOGGER(l); - WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape - << " vs " << cond_graph_input_shape; - } - - return loco::NodeShape{this_shape}; - } + loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); } }; } // namespace diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp index e7910bfc0..d28d8ac99 100644 --- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp @@ -257,6 +257,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT return loco::dtype_get(node->boxes()); } + loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final + { + return loco::dtype_get(node->boxes()); + } + loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; } loco::DataType visit(const luci::CirclePack *node) final @@ -273,6 +278,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); } + loco::DataType visit(const luci::CirclePadV2 *node) final + { + return loco::dtype_get(node->input()); + } + loco::DataType visit(const luci::CirclePow *node) final { // TODO make sure types cannot differ @@ -589,6 +599,17 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT return loco::DataType::S32; } + loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final + { + (void)node; + if (node->index() == 0 || node->index() == 2) + { + return loco::DataType::S32; + } + assert(node->index() == 1); + return loco::DataType::FLOAT32; + } + loco::DataType visit(const luci::CircleSplitOut *node) final { return loco::dtype_get(node->input()); diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst index 9fd42ed4e..12dd7ff5b 100644 --- a/compiler/luci/tests/test.lst +++ b/compiler/luci/tests/test.lst @@ -96,6 +96,10 @@ addread(MirrorPad_000) addread(Mul_000) addread(Mul_U8_000) addread(Neg_000) +addread(NonMaxSuppressionV4_000) +addread(NonMaxSuppressionV4_001) +addread(NonMaxSuppressionV5_000) +addread(NonMaxSuppressionV5_001) addread(NotEqual_000) addread(OneHot_000) addread(OneHot_001) @@ -105,6 +109,7 @@ addread(Pack_000) addread(Pack_U8_000) addread(Pad_000) addread(Pad_U8_000) +addread(PadV2_000) addread(Pow_000) addread(PRelu_000) addread(Range_000) @@ -128,6 +133,7 @@ addread(Reshape_002) addread(Reshape_003) addread(Reshape_U8_000) addread(ResizeBilinear_000) +addread(ResizeBilinear_U8_000) addread(ResizeNearestNeighbor_000) addread(ReverseSequence_000) addread(ReverseV2_000) @@ -151,6 +157,7 @@ addread(SpaceToBatchND_001) addread(SpaceToBatchND_002) addread(SpaceToBatchND_003) addread(SpaceToDepth_000) +addread(SpaceToDepth_U8_000) addread(SparseToDense_000) addread(Split_000) addread(SplitV_000) @@ -166,12 +173,19 @@ addread(Sub_U8_000) addread(Sum_000) addread(Sum_001) addread(Tanh_000) +addread(Tanh_U8_000) addread(Tile_000) addread(Tile_U8_000) addread(TopKV2_000) addread(TopKV2_001) addread(Transpose_000) addread(TransposeConv_000) +addread(Unique_000) +addread(Unique_001) +addread(Unique_002) +addread(Unique_003) +addread(Unique_U8_000) +addread(Unique_U8_001) addread(Unpack_000) addread(Unpack_001) addread(Unpack_002) @@ -296,6 +310,10 @@ addwrite(MirrorPad_000) addwrite(Mul_000) addwrite(Mul_U8_000) addwrite(Neg_000) +addwrite(NonMaxSuppressionV4_000) +addwrite(NonMaxSuppressionV4_001) +addwrite(NonMaxSuppressionV5_000) +addwrite(NonMaxSuppressionV5_001) addwrite(NotEqual_000) addwrite(OneHot_000) addwrite(OneHot_001) @@ -304,6 +322,7 @@ addwrite(OneHot_003) addwrite(Pack_000) addwrite(Pack_U8_000) addwrite(Pad_000) +addwrite(PadV2_000) addwrite(Pow_000) addwrite(PRelu_000) addwrite(Range_000) @@ -327,6 +346,7 @@ addwrite(Reshape_002) addwrite(Reshape_003) addwrite(Reshape_U8_000) addwrite(ResizeBilinear_000) +addwrite(ResizeBilinear_U8_000) addwrite(ResizeNearestNeighbor_000) addwrite(ReverseSequence_000) addwrite(ReverseV2_000) @@ -350,6 +370,7 @@ addwrite(SpaceToBatchND_001) addwrite(SpaceToBatchND_002) addwrite(SpaceToBatchND_003) addwrite(SpaceToDepth_000) +addwrite(SpaceToDepth_U8_000) addwrite(SparseToDense_000) addwrite(Split_000) addwrite(SplitV_000) @@ -365,12 +386,19 @@ addwrite(Sub_U8_000) addwrite(Sum_000) addwrite(Sum_001) addwrite(Tanh_000) +addwrite(Tanh_U8_000) addwrite(Tile_000) addwrite(Tile_U8_000) addwrite(TopKV2_000) addwrite(TopKV2_001) addwrite(Transpose_000) addwrite(TransposeConv_000) +addwrite(Unique_000) +addwrite(Unique_001) +addwrite(Unique_002) +addwrite(Unique_003) +addwrite(Unique_U8_000) +addwrite(Unique_U8_001) addwrite(Unpack_000) addwrite(Unpack_001) addwrite(Unpack_002) diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index d59e1c529..58c686882 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -83,6 +83,10 @@ while [ "$#" -ne 0 ]; do esac done +if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then + echo "Warning: if --v2 option is used, shape will be ignored" +fi + if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then echo "Error: input model not found" echo "" @@ -117,16 +121,18 @@ show_err_onexit() trap show_err_onexit ERR # generate temporary tflite file -echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \ ---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \ ---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \ ---output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log" -echo " " >> "${OUTPUT_PATH}.log" +CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} " +CONVERT_SCRIPT+="--input_path ${INPUT_PATH} " +CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} " +CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite " +CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} " +if [ ! -z ${INPUT_SHAPES} ]; then + CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} " +fi -python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \ ---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \ ---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \ ---output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1 +echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log" +echo "" >> "${OUTPUT_PATH}.log" +$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1 # convert .tflite to .circle echo " " >> "${OUTPUT_PATH}.log" diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv index 0a53bd3dd..0b11e7f0b 100644 --- a/compiler/one-cmds/one-prepare-venv +++ b/compiler/one-cmds/one-prepare-venv @@ -46,7 +46,9 @@ python3 -m venv "${DRIVER_PATH}/venv" # Install tensorflow source "${VENV_ACTIVATE}" +# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0' +# NOTE adding version is for temporary hotfix of setuptools 50.x.y version python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \ - install -U pip setuptools + install -U pip==20.2.1 setuptools==49.3.0 python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \ install tensorflow-cpu==2.3.0 diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..6460e54cf --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 1.0, + 2.0 + ], + [ + -3.0, + -4.0 + ] + ], + [ + [ + -5.0, + 6.0 + ], + [ + -7.0, + 8.0 + ] + ] + ], + [ + [ + [ + 4.0, + -2.0 + ], + [ + 3.0, + -1.0 + ] + ], + [ + [ + -8.0, + -6.0 + ], + [ + 7.0, + 5.0 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json new file mode 100644 index 000000000..a55af0be5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 4374, + 8747 + ], + "scale": [ + 0.0002286423499283808, + 0.0002286423499283808 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json new file mode 100644 index 000000000..0e481bbfd --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0038869199343025684, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json new file mode 100644 index 000000000..4e12a5550 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json @@ -0,0 +1,64 @@ +{ + "weights": [ + [ + [ + [ + 136, + 153 + ], + [ + 68, + 51 + ] + ], + [ + [ + 34, + 221 + ], + [ + 0, + 255 + ] + ] + ], + [ + [ + [ + 204, + 102 + ], + [ + 187, + 119 + ] + ], + [ + [ + 0, + 34 + ], + [ + 255, + 221 + ] + ] + ] + ], + "scale": [ + 0.058823529411764705, + 0.058823529411764705 + ], + "zero_point": [ + 119.0, + 136.0 + ], + "min": [ + -7.0, + -8.0 + ], + "max": [ + 8.0, + 7.0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json new file mode 100644 index 000000000..7d23cbad2 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.05829785391688347, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..af8dc16de --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.022708916887640953, + "max": 0.9911645770072937 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..5f7bd9942 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": 0.0, + "max": 14.86595230102539 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..675eadcb6 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json @@ -0,0 +1,34 @@ +{ + "weights": [ + [ + [ + [ + 1.0352935791015625, + 1.976470947265625, + 2.9568634033203125, + 3.95294189453125 + ], + [ + -8.972549438476562, + 9.976470947265625, + -11.011764526367188, + 11.9686279296875 + ] + ], + [ + [ + 5.0039215087890625, + 6.023530960083008, + 7.035295486450195, + 8.01568603515625 + ], + [ + 13.027450561523438, + -14.023529052734375, + 14.988235473632812, + -16.0313720703125 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json new file mode 100644 index 000000000..3cda45238 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json @@ -0,0 +1,14 @@ +{ + "weights": [ + 2985, + 5473, + 7578, + 9382 + ], + "scale": [ + 0.0003349798455903035, + 0.0003654325561959198, + 0.00039588526680153606, + 0.00042633797740715233 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json new file mode 100644 index 000000000..97931cc58 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003882720833644271, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json new file mode 100644 index 000000000..add4d0f35 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json @@ -0,0 +1,58 @@ +{ + "weights": [ + [ + [ + [ + 116, + 170, + 137, + 182 + ], + [ + 0, + 255, + 0, + 255 + ] + ], + [ + [ + 162, + 213, + 177, + 219 + ], + [ + 255, + 0, + 255, + 0 + ] + ] + ] + ], + "scale": [ + 0.08627450980392157, + 0.09411764705882353, + 0.10196078431372549, + 0.10980392156862745 + ], + "zero_point": [ + 104.0, + 149.0, + 108.0, + 146.0 + ], + "min": [ + -8.972549019607843, + -14.023529411764706, + -11.011764705882353, + -16.031372549019608 + ], + "max": [ + 13.027450980392157, + 9.976470588235294, + 14.988235294117647, + 11.968627450980392 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json new file mode 100644 index 000000000..f587aac24 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.07756166160106659, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..fa8fffc3e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.003264044094830751, + "max": 0.9900938200950622 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..612c0b4ea --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": 0.0, + "max": 19.778222274780273 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json new file mode 100644 index 000000000..4661cb3ca --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json @@ -0,0 +1,76 @@ +{ + "weights": [ + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608020782471, + 6.023530006408691, + -7.027451515197754, + 7.968626976013184, + 4.015686988830566, + -2.007843017578125, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608020782471, + 6.023530006408691, + -7.027451515197754, + 7.968626976013184, + 4.015686988830566, + -2.007843017578125, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608020782471, + 6.023530006408691, + -7.027451515197754, + 7.968626976013184, + 4.015686988830566, + -2.007843017578125, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608020782471, + 6.023530006408691, + -7.027451515197754, + 7.968626976013184, + 4.015686988830566, + -2.007843017578125, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json new file mode 100644 index 000000000..4333c0fed --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json @@ -0,0 +1,14 @@ +{ + "weights": [ + 4099, + -8199, + -12298, + 16398 + ], + "scale": [ + 0.00024393631821001058, + 0.00024393631821001058, + 0.00024393631821001058, + 0.00024393631821001058 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json new file mode 100644 index 000000000..8edac1bd9 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003887734841555357, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json new file mode 100644 index 000000000..1b94f1652 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json @@ -0,0 +1,4 @@ +{ + "scale": 0.061938945204019547, + "zero_point": 171.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json new file mode 100644 index 000000000..5ee46c87f --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json @@ -0,0 +1,100 @@ +{ + "weights": [ + [ + 144, + 160, + 80, + 64, + 48, + 224, + 16, + 255, + 192, + 96, + 176, + 112, + 1, + 32, + 240, + 208 + ], + [ + 144, + 160, + 80, + 64, + 48, + 224, + 16, + 255, + 192, + 96, + 176, + 112, + 1, + 32, + 240, + 208 + ], + [ + 144, + 160, + 80, + 64, + 48, + 224, + 16, + 255, + 192, + 96, + 176, + 112, + 1, + 32, + 240, + 208 + ], + [ + 144, + 160, + 80, + 64, + 48, + 224, + 16, + 255, + 192, + 96, + 176, + 112, + 1, + 32, + 240, + 208 + ] + ], + "scale": [ + 0.06274509803921569, + 0.06274509803921569, + 0.06274509803921569, + 0.06274509803921569 + ], + "zero_point": [ + 128.0, + 128.0, + 128.0, + 128.0 + ], + "min": [ + -8.031372549019608, + -8.031372549019608, + -8.031372549019608, + -8.031372549019608 + ], + "max": [ + 7.968627450980392, + 7.968627450980392, + 7.968627450980392, + 7.968627450980392 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json new file mode 100644 index 000000000..48e4645c9 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json @@ -0,0 +1,4 @@ +{ + "min": 0.010438590832054616, + "max": 0.9913724160194397 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json new file mode 100644 index 000000000..ec83b94d1 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json @@ -0,0 +1,4 @@ +{ + "min": -10.584291763305664, + "max": 5.210139312744141 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..76a0440a0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 0.960784912109375, + 2.0588245391845703 + ], + [ + -3.0196075439453125, + -3.980391502380371 + ], + [ + 4.9411773681640625, + -6.039215087890625 + ] + ], + [ + [ + 7.0, + 7.960784912109375 + ], + [ + -9.058823585510254, + -10.019607543945312 + ], + [ + 10.980392456054688, + -11.941176414489746 + ] + ], + [ + [ + 13.039216995239258, + 14.000001907348633 + ], + [ + -14.960784912109375, + -16.05882453918457 + ], + [ + 17.019607543945312, + -17.980392456054688 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json new file mode 100644 index 000000000..4c3669f6b --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0038701011799275875, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json new file mode 100644 index 000000000..04e0648de --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json @@ -0,0 +1,60 @@ +{ + "weights": [ + [ + [ + [ + 138, + 146 + ], + [ + 109, + 102 + ], + [ + 167, + 87 + ] + ], + [ + [ + 182, + 189 + ], + [ + 65, + 58 + ], + [ + 211, + 44 + ] + ], + [ + [ + 226, + 233 + ], + [ + 22, + 14 + ], + [ + 255, + 0 + ] + ] + ] + ], + "scale": [ + 0.13725490196078433 + ], + "zero_point": [ + 131.0 + ], + "min": [ + -17.980392156862745 + ], + "max": [ + 17.019607843137255 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json new file mode 100644 index 000000000..2e1790508 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.25486624240875244, + "zero_point": 178.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..d46844baf --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.006121497452259064, + "max": 0.9868757891654968 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..4441f1876 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -45.46586318969727, + "max": 19.525028419494628 +} diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst index 9eb348922..d9fd91761 100644 --- a/compiler/pota-quantization-value-test/test.lst +++ b/compiler/pota-quantization-value-test/test.lst @@ -1,4 +1,8 @@ +addTest(Conv2D_004 channel uint8) addTest(Conv2D_004 layer uint8) +addTest(DepthwiseConv2D_002 channel uint8) addTest(DepthwiseConv2D_002 layer uint8) +addTest(FullyConnected_003 channel uint8) addTest(FullyConnected_003 layer uint8) +addTest(TransposeConv_001 channel uint8) addTest(TransposeConv_001 layer uint8) diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt new file mode 100644 index 000000000..98e895c04 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt @@ -0,0 +1 @@ +0.19242816,0.44059092,0.06788187,0.04543579,0.14106855,0.6858487 ,0.6214997 ,0.31582046,0.859484 ,0.3664256 ,0.86936104,0.871024 ,0.68752515,0.5296719 ,0.99137205,0.02956272,0.14838405,0.69830126,0.22359788,0.9060323 ,0.7141239 ,0.5573066 ,0.96645916,0.11426282 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt new file mode 100644 index 000000000..f480f8086 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt @@ -0,0 +1 @@ +0.57016104,0.2788207 ,0.8045938 ,0.7589986 ,0.81506515,0.8411593 ,0.4162234 ,0.1664247 ,0.5584996 ,0.7799966 ,0.4213713 ,0.97587234,0.79440975,0.5089373 ,0.90030503,0.78015554,0.10080549,0.5115089 ,0.77238286,0.9580212 ,0.8758745 ,0.14367636,0.4304664 ,0.55175275 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt new file mode 100644 index 000000000..683ea39b0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt @@ -0,0 +1 @@ +0.6224246 ,0.30448085,0.29629433,0.44483584,0.30473125,0.6186932 ,0.45563242,0.5394331 ,0.22901213,0.4313142 ,0.4019574 ,0.02263176,0.3806077 ,0.27828163,0.23962335,0.26323524,0.6125012 ,0.5459546 ,0.6340052 ,0.19074932,0.2216875 ,0.77709603,0.03312786,0.02945002 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt new file mode 100644 index 000000000..56c8c259e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt @@ -0,0 +1 @@ +0.7524557 ,0.5408983 ,0.07039106,0.5143847 ,0.04857475,0.7305833 ,0.36986747,0.42291477,0.90452653,0.43744263,0.24857366,0.7537328 ,0.04559262,0.65276045,0.3851062 ,0.49503985,0.37213495,0.10627239,0.7085863 ,0.1913133 ,0.08057284,0.31767172,0.9685745 ,0.5942544 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt new file mode 100644 index 000000000..ecb221e8b --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt @@ -0,0 +1 @@ +0.16251074,0.5574537 ,0.5857036 ,0.877607 ,0.29711136,0.02456062,0.8250261 ,0.21300122,0.5064036 ,0.5882086 ,0.7736793 ,0.09394809,0.98618525,0.6611699 ,0.5001983 ,0.06507304,0.88984424,0.57143325,0.07953393,0.02649987,0.9283147 ,0.65522593,0.18371649,0.12332761 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt new file mode 100644 index 000000000..f4fb503ea --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt @@ -0,0 +1 @@ +0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt new file mode 100644 index 000000000..af4b01576 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt @@ -0,0 +1 @@ +0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375 ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt new file mode 100644 index 000000000..57716034e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt @@ -0,0 +1 @@ +0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829 ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt new file mode 100644 index 000000000..1e03d83b0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt @@ -0,0 +1 @@ +0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597 ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt new file mode 100644 index 000000000..89ee30a6b --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt @@ -0,0 +1 @@ +0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt new file mode 100644 index 000000000..9b19de586 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt @@ -0,0 +1 @@ +0.12934422,0.01033248,0.85648465,0.77248603,0.5128501 ,0.2453174 ,0.05065866,0.6601359 ,0.984665 ,0.57697976,0.58360994,0.79360527,0.90097004,0.26150337,0.1575109 ,0.9711614 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt new file mode 100644 index 000000000..45247791a --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt @@ -0,0 +1 @@ +0.23895125,0.30275205,0.9916519 ,0.52355504,0.2577219 ,0.03600567,0.75446343,0.8064663 ,0.07550113,0.919774 ,0.84333146,0.48820078,0.31365713,0.97172034,0.7472666 ,0.66353893 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt new file mode 100644 index 000000000..851e72c7d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt @@ -0,0 +1 @@ +0.6186688 ,0.4357826 ,0.63239735,0.64489084,0.17722449,0.7146202 ,0.5182415 ,0.45549247,0.21316396,0.9769707 ,0.18412311,0.05855984,0.6755795 ,0.8516815 ,0.20649713,0.32990783 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt new file mode 100644 index 000000000..7ff3c7576 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt @@ -0,0 +1 @@ +0.15501449,0.67026544,0.2957976 ,0.95577955,0.6215903 ,0.2029572 ,0.6069057 ,0.60434276,0.01298514,0.66787016,0.02053251,0.34120578,0.63562113,0.9166186 ,0.7134427 ,0.95491254 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt new file mode 100644 index 000000000..fe60dbd26 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt @@ -0,0 +1 @@ +0.46877268,0.36748132,0.09441566,0.4476946 ,0.08834982,0.5387882 ,0.8359256 ,0.4374628 ,0.3835091 ,0.3577151 ,0.49470654,0.6017202 ,0.3546875 ,0.64218026,0.69008195,0.37631917 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt new file mode 100644 index 000000000..fb728bb70 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt @@ -0,0 +1 @@ +0.5177879 ,0.10991199,0.19134527,0.25834408,0.16297385,0.5499753 ,0.8782323 ,0.74750453,0.16825114,0.72425395,0.68458 ,0.9399099 ,0.81214494,0.73325175,0.6407931 ,0.02865177,0.04341139,0.44781777,0.59848577,0.72099334,0.654926 ,0.93810713,0.5193446 ,0.8657371 ,0.50826824,0.10122011,0.6946167 ,0.5009533 ,0.27305812,0.7708204 ,0.14410722,0.7092205 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt new file mode 100644 index 000000000..8c72dc764 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt @@ -0,0 +1 @@ +0.57410187,0.5534829 ,0.434663 ,0.55580896,0.9040647 ,0.16827786,0.82538676,0.25387943,0.7611494 ,0.49195638,0.00602222,0.20389748,0.541152 ,0.962896 ,0.37785006,0.9330408 ,0.9868882 ,0.57428783,0.830525 ,0.67987496,0.5576374 ,0.4303 ,0.8442439 ,0.21868347,0.45653513,0.7913927 ,0.31475154,0.6723579 ,0.5749264 ,0.07061622,0.6450232 ,0.52825755 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt new file mode 100644 index 000000000..04ff6ae29 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt @@ -0,0 +1 @@ +0.49751657,0.3004485 ,0.11624487,0.17704253,0.9022095 ,0.24667789,0.9204152 ,0.09801941,0.9194739 ,0.35418576,0.36659864,0.4962548 ,0.83799136,0.58057517,0.2948883 ,0.28411615,0.14429809,0.8460358 ,0.7026028 ,0.25956342,0.5251088 ,0.06569998,0.01754393,0.45209908,0.95638806,0.6044543 ,0.17229715,0.6828144 ,0.8684328 ,0.5829665 ,0.1456113 ,0.3334334 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt new file mode 100644 index 000000000..1342dac2f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt @@ -0,0 +1 @@ +0.00850414,0.5746211 ,0.7659193 ,0.8643168 ,0.36803156,0.08386383,0.76002747,0.19255683,0.05220222,0.18169314,0.88597506,0.6793377 ,0.45955214,0.16984127,0.5275391 ,0.910098 ,0.64607793,0.3997594 ,0.38601097,0.40899974,0.10289235,0.896202 ,0.22364503,0.30232555,0.11873382,0.07853477,0.20674925,0.35148785,0.02880615,0.09937044,0.4382221 ,0.53562754 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt new file mode 100644 index 000000000..e3e85392e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt @@ -0,0 +1 @@ +0.8097857 ,0.4602844 ,0.01609277,0.7885611 ,0.9090256 ,0.75475484,0.98657864,0.5927874 ,0.73494065,0.374227 ,0.23557834,0.6020654 ,0.0122237 ,0.37126908,0.38277507,0.67635936,0.4139088 ,0.8625733 ,0.37775922,0.15304309,0.6196326 ,0.4827059 ,0.76868814,0.5530773 ,0.3336473 ,0.11217184,0.5877591 ,0.5325879 ,0.48493427,0.6317438 ,0.9385114 ,0.02825027 diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp index 17c6aa6ff..0ef7cccd1 100644 --- a/compiler/record-minmax/src/RecordMinMax.cpp +++ b/compiler/record-minmax/src/RecordMinMax.cpp @@ -16,12 +16,12 @@ #include "RecordMinMax.h" #include "RecordFunction.h" -#include "CircleExpContract.h" #include "MinMaxObserver.h" #include "HDF5Importer.h" #include <luci/Importer.h> #include <luci/CircleExporter.h> +#include <luci/CircleFileExpContract.h> #include <luci/IR/CircleQuantParam.h> #include <algorithm> @@ -83,6 +83,15 @@ void RecordMinMax::initialize(const std::string &input_model_path) } std::vector<char> model_data((std::istreambuf_iterator<char>(fs)), std::istreambuf_iterator<char>()); + + // Verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()), + model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'"); + } + _module = luci::Importer().importModule(circle::GetModel(model_data.data())); if (_module == nullptr) @@ -185,7 +194,8 @@ void RecordMinMax::saveModel(const std::string &output_model_path) { // Export to output Circle file luci::CircleExporter exporter; - CircleExpContract contract(_module.get(), output_model_path); + + luci::CircleFileExpContract contract(_module.get(), output_model_path); if (!exporter.invoke(&contract)) { diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt index 5a307be16..ca7eddc6f 100644 --- a/compiler/souschef/CMakeLists.txt +++ b/compiler/souschef/CMakeLists.txt @@ -1,5 +1,13 @@ +nnas_find_package(Protobuf QUIET) + +if(NOT Protobuf_FOUND) + message(STATUS "Build souschef: FAILED (missing Protobuf") + return() +endif(NOT Protobuf_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") add_library(souschef STATIC ${SOURCES}) set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(souschef PUBLIC include) +target_link_libraries(souschef PUBLIC libprotobuf) diff --git a/compiler/souschef/include/souschef/Dataset.h b/compiler/souschef/include/souschef/Dataset.h index 46a12e424..ef67a7316 100644 --- a/compiler/souschef/include/souschef/Dataset.h +++ b/compiler/souschef/include/souschef/Dataset.h @@ -19,6 +19,8 @@ #include <vector> +#include <google/protobuf/repeated_field.h> + namespace souschef { @@ -57,6 +59,21 @@ private: std::vector<T> _vec; }; +template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field) +{ + std::vector<T> res; + for (const auto &elem : field) + { + res.emplace_back(elem); + } + return res; +} + +template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field) +{ + return Dataset<T>(as_vector<T>(field)); +} + } // namespace souschef #endif // __SOUSCHEF_DATASET_H__ diff --git a/compiler/souschef/include/souschef/Dims.h b/compiler/souschef/include/souschef/Dims.h new file mode 100644 index 000000000..52c64dd47 --- /dev/null +++ b/compiler/souschef/include/souschef/Dims.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SOUSCHEF_DIMS_H__ +#define __SOUSCHEF_DIMS_H__ + +#include <functional> +#include <numeric> +#include <vector> + +namespace souschef +{ + +template <typename T> using Dims = std::vector<T>; + +template <typename SHAPETYPE> Dims<int32_t> as_dims(const SHAPETYPE &shape) +{ + std::vector<int32_t> res; + + for (auto &dim : shape.dim()) + { + res.emplace_back(static_cast<int32_t>(dim)); + } + + return res; +} + +int32_t element_count(const Dims<int32_t> &dims) +{ + return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>()); +} + +} // namespace souschef + +#endif // __SOUSCHEF_DIMS_H__ diff --git a/compiler/souschef/include/souschef/TensorFiller.h b/compiler/souschef/include/souschef/TensorFiller.h new file mode 100644 index 000000000..1d87f1372 --- /dev/null +++ b/compiler/souschef/include/souschef/TensorFiller.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SOUSCHEF_TENSOR_FILLER_H__ +#define __SOUSCHEF_TENSOR_FILLER_H__ + +#include <map> +#include <vector> + +namespace souschef +{ + +class TensorFiller +{ +public: + virtual ~TensorFiller() = default; + + /** + * @brief This will record the tensor by index, if it needs filler option, + * such as kernel, bias. + */ + void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; } + + /** + * @brief This will store int32 filler values such as reshape information for the tensor + */ + void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) + { + _tensor_filler_vint32[tensor_index] = expvalues; + } + + void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) + { + _tensor_filler_vfloat[tensor_index] = expvalues; + } + + /** + * @brief This will return true if the tensor by index, needs a filler option. + */ + bool get_tensor_filler(uint32_t tensor_index) + { + auto it = _tensor_filler.find(tensor_index); + if (it != _tensor_filler.end()) + { + return it->second; + } + return false; + } + + /** + * @brief This will return true if the tensor by index, needs a int array filler option. + */ + bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) + { + auto it = _tensor_filler_vint32.find(tensor_index); + if (it != _tensor_filler_vint32.end()) + { + expvalues = it->second; + return true; + } + return false; + } + + bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) + { + auto it = _tensor_filler_vfloat.find(tensor_index); + if (it != _tensor_filler_vfloat.end()) + { + expvalues = it->second; + return true; + } + return false; + } + +private: + std::map<uint32_t, bool> _tensor_filler{}; + std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{}; + std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{}; +}; + +} // namespace souschef + +#endif // __SOUSCHEF_TENSOR_FILLER_H__ diff --git a/compiler/circle-quantizer/src/CircleExpContract.cpp b/compiler/souschef/src/Dims.cpp index b56b7eedc..fba4813fc 100644 --- a/compiler/circle-quantizer/src/CircleExpContract.cpp +++ b/compiler/souschef/src/Dims.cpp @@ -14,20 +14,6 @@ * limitations under the License. */ -#include "CircleExpContract.h" +#include "souschef/Dims.h" -#include <oops/InternalExn.h> - -#include <fstream> -#include <iostream> - -bool CircleExpContract::store(const char *ptr, const size_t size) const -{ - if (!ptr) - INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason"); - - std::ofstream fs(_filepath.c_str(), std::ofstream::binary); - fs.write(ptr, size); - - return fs.good(); -} +// NOTE Do NOT delete this file; this file checks the completeness of 'Dims.h' diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index 692ce48c1..a4b435dfa 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -26,6 +26,7 @@ #include "OpChefs.h" #include <souschef/Dataset.h> +#include <souschef/Dims.h> #include "Log.h" @@ -41,52 +42,8 @@ #include <sstream> #include <stdexcept> -namespace -{ - using namespace souschef; -template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field) -{ - std::vector<T> res; - for (const auto &elem : field) - { - res.emplace_back(elem); - } - return res; -} - -template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field) -{ - return Dataset<T>(as_vector<T>(field)); -} - -} // namespace - -namespace -{ - -template <typename T> using Dims = std::vector<T>; - -Dims<int32_t> as_dims(const tflchef::TensorShape &shape) -{ - std::vector<int32_t> res; - - for (auto &dim : shape.dim()) - { - res.emplace_back(static_cast<int32_t>(dim)); - } - - return res; -} - -int32_t element_count(const Dims<int32_t> &dims) -{ - return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>()); -} - -} // namespace - namespace { diff --git a/compiler/record-minmax/src/CircleExpContract.cpp b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp index b703250bd..500aa467f 100644 --- a/compiler/record-minmax/src/CircleExpContract.cpp +++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp @@ -14,25 +14,17 @@ * limitations under the License. */ -#include "CircleExpContract.h" +#include "NonMaxSuppressionV5.h" -#include <oops/InternalExn.h> - -#include <fstream> -#include <iostream> - -namespace record_minmax -{ - -bool CircleExpContract::store(const char *ptr, const size_t size) const +flatbuffers::Offset<void> NonMaxSuppressionV5Chef::value(flatbuffers::FlatBufferBuilder &fbb) const { - if (!ptr) - INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason"); + tflite::NonMaxSuppressionV5OptionsBuilder options_builder{fbb}; - std::ofstream fs(_filepath, std::ofstream::binary); - fs.write(ptr, size); - - return fs.good(); + return options_builder.Finish().Union(); } -} // namespace record_minmax +std::unique_ptr<OpChef> +NonMaxSuppressionV5ChefFactory::create(const tflchef::Operation *operation) const +{ + return std::unique_ptr<OpChef>{new NonMaxSuppressionV5Chef{operation}}; +} diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h new file mode 100644 index 000000000..a3c8b6009 --- /dev/null +++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OP_NON_MAX_SUPPRESSION_V5_H__ +#define __OP_NON_MAX_SUPPRESSION_V5_H__ + +#include "OpChef.h" + +class NonMaxSuppressionV5Chef final : public OpChef +{ +public: + explicit NonMaxSuppressionV5Chef(const tflchef::Operation *operation) : _operation{operation} + { + // DO NOTHING + } + +public: + tflite::BuiltinOperator code(void) const override + { + return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V5; + } + + tflite::BuiltinOptions type(void) const override + { + return tflite::BuiltinOptions_NonMaxSuppressionV5Options; + } + + flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override; + +private: + const tflchef::Operation *_operation; +}; + +struct NonMaxSuppressionV5ChefFactory final : public OpChefFactory +{ + std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override; +}; + +#endif // __OP_NON_MAX_SUPPRESSION_V5_H__ diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def index 244186265..6b242e811 100644 --- a/compiler/tflchef/core/src/OpChef.def +++ b/compiler/tflchef/core/src/OpChef.def @@ -56,6 +56,7 @@ OP_CHEF(MirrorPad, MirrorPadChefFactory) OP_CHEF(Mul, MulChefFactory) OP_CHEF(Neg, NegChefFactory) OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory) +OP_CHEF(NonMaxSuppressionV5, NonMaxSuppressionV5ChefFactory) OP_CHEF(NotEqual, NotEqualChefFactory) OP_CHEF(OneHot, OneHotChefFactory) OP_CHEF(Pack, PackChefFactory) diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h index 5b2e89bd9..7637b1c69 100644 --- a/compiler/tflchef/core/src/OpChefs.h +++ b/compiler/tflchef/core/src/OpChefs.h @@ -69,6 +69,7 @@ #include "Op/Mul.h" #include "Op/Neg.h" #include "Op/NonMaxSuppressionV4.h" +#include "Op/NonMaxSuppressionV5.h" #include "Op/NotEqual.h" #include "Op/OneHot.h" #include "Op/Pack.h" diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto index 70b966ec3..9909d517a 100644 --- a/compiler/tflchef/proto/tflchef.proto +++ b/compiler/tflchef/proto/tflchef.proto @@ -371,6 +371,10 @@ message NonMaxSuppressionV4Options { // None } +message NonMaxSuppressionV5Options { + // None +} + message NotEqualOptions { // None } @@ -544,7 +548,7 @@ message Operation { // HardSwishOptions 196 optional DepthToSpaceOptions depth_to_space_options = 197; optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198; - // NonMaxSuppressionV5Options 199 + optional NonMaxSuppressionV5Options non_max_suppression_v5_options = 199; optional ScatterNdOptions scatter_nd_options = 200; optional NotEqualOptions notequal_options = 201; optional ExpandDimsOptions expand_dims_options = 202; diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt index 645c16144..83127cb3e 100644 --- a/compiler/tflchef/tflite/CMakeLists.txt +++ b/compiler/tflchef/tflite/CMakeLists.txt @@ -7,3 +7,4 @@ target_link_libraries(tflchef_tflite tflchef_proto) target_link_libraries(tflchef_tflite mio_tflite) target_link_libraries(tflchef_tflite stdex) target_link_libraries(tflchef_tflite cwrap) +target_link_libraries(tflchef_tflite souschef) diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp new file mode 100644 index 000000000..db7f4c932 --- /dev/null +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NonMaxSuppressionV5.h" + +#include "Convert.h" +#include "FillerHelper.h" + +namespace tflchef +{ + +void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const +{ + const auto &inputs = *op->inputs(); + + const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]); + assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32); + + const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]); + assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32); + + const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]); + assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32); + + const tflite::Tensor *soft_nms_sigma_tensor = import->tensors()->Get(inputs[5]); + assert(soft_nms_sigma_tensor->type() == tflite::TensorType::TensorType_FLOAT32); + + for (int32_t index = 2; index < 6; ++index) + { + fill_tensor_to_import(index, import); + } +} + +tflchef::Operation *TFliteOpNonMaxSuppressionV5::build(const tflite::Operator *op, + TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const +{ + auto operation = model_recipe->add_operation(); + + operation->set_type("NonMaxSuppressionV5"); + + return operation; +} + +} // namespace tflchef diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h new file mode 100644 index 000000000..c948043f4 --- /dev/null +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__ +#define __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__ + +#include "TFliteOpChef.h" + +namespace tflchef +{ + +/** + * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V5 + */ +class TFliteOpNonMaxSuppressionV5 : public TFliteOpChef +{ +public: + void filler(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const override; + tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const override; +}; + +} // namespace tflchef + +#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__ diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h index 5b46f4501..9d0a642ab 100644 --- a/compiler/tflchef/tflite/src/TFliteImport.h +++ b/compiler/tflchef/tflite/src/TFliteImport.h @@ -19,6 +19,8 @@ #include <mio/tflite/schema_generated.h> +#include <souschef/TensorFiller.h> + #include <tflchef.pb.h> #include <map> @@ -40,7 +42,7 @@ bool is_custom(const tflite::OperatorCode *opcode); /** * @brief Loads TF lite file and provides helpers to access attributes */ -class TFliteImport +class TFliteImport : public souschef::TensorFiller { public: TFliteImport(const tflite::Model *model); @@ -63,63 +65,6 @@ public: std::string opcode_name(const tflite::Operator *op) const; size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data); - /** - * @brief This will record the tensor by index, if it needs filler option, - * such as kernel, bias. - */ - void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; } - - /** - * @brief This will store int32 filler values such as reshape information for the tensor - */ - void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) - { - _tensor_filler_vint32[tensor_index] = expvalues; - } - - void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) - { - _tensor_filler_vfloat[tensor_index] = expvalues; - } - - /** - * @brief This will return true if the tensor by index, needs a filler option. - */ - bool get_tensor_filler(uint32_t tensor_index) - { - auto it = _tensor_filler.find(tensor_index); - if (it != _tensor_filler.end()) - { - return it->second; - } - return false; - } - - /** - * @brief This will return true if the tensor by index, needs a int array filler option. - */ - bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues) - { - auto it = _tensor_filler_vint32.find(tensor_index); - if (it != _tensor_filler_vint32.end()) - { - expvalues = it->second; - return true; - } - return false; - } - - bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues) - { - auto it = _tensor_filler_vfloat.find(tensor_index); - if (it != _tensor_filler_vfloat.end()) - { - expvalues = it->second; - return true; - } - return false; - } - private: const TFliteSubGraphs_t *_subgraphs{nullptr}; const TFliteBuffers_t *_buffers{nullptr}; @@ -129,10 +74,6 @@ private: std::vector<const tflite::OperatorCode *> _op_codes{}; std::vector<int32_t> _inputs{}; std::vector<int32_t> _outputs{}; - - std::map<uint32_t, bool> _tensor_filler{}; - std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{}; - std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{}; }; } // namespace tflchef diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h index de14e37d1..36a010957 100644 --- a/compiler/tflchef/tflite/src/TFliteOpChefs.h +++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h @@ -69,6 +69,7 @@ #include "Op/Mul.h" #include "Op/Neg.h" #include "Op/NonMaxSuppressionV4.h" +#include "Op/NonMaxSuppressionV5.h" #include "Op/NotEqual.h" #include "Op/OneHot.h" #include "Op/Pack.h" diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h index 8d33007be..a454e98b6 100644 --- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h +++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h @@ -106,6 +106,7 @@ private: REG_TFL_OP(MUL, TFliteOpMul); REG_TFL_OP(NEG, TFliteOpNeg); REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4); + REG_TFL_OP(NON_MAX_SUPPRESSION_V5, TFliteOpNonMaxSuppressionV5); REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual); REG_TFL_OP(ONE_HOT, TFliteOpOneHot); REG_TFL_OP(PACK, TFliteOpPack); diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index df027c3e3..24b9264ff 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -677,9 +677,11 @@ OpPrinterRegistry::OpPrinterRegistry() _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>(); _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>(); // There is no Option for NON_MAX_SUPPRESSION_V4 + // There is no Option for NON_MAX_SUPPRESSION_V5 _op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>(); _op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>(); // There is no Option for PAD + // There is no Option for PADV2 // There is no Option for PRELU // There is no Option for RELU // There is no Option for RELU6 diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h index 00b3de943..680118618 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions.h +++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h @@ -63,6 +63,7 @@ #include "BuildBuiltinOptions/MulOptions.h" #include "BuildBuiltinOptions/NegOptions.h" #include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h" +#include "BuildBuiltinOptions/NonMaxSuppressionV5Options.h" #include "BuildBuiltinOptions/NotEqualOptions.h" #include "BuildBuiltinOptions/OneHotOptions.h" #include "BuildBuiltinOptions/PackOptions.h" diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp index b56b7eedc..637c544ff 100644 --- a/compiler/luci-value-test/tester/src/CircleExpContract.cpp +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp @@ -14,20 +14,17 @@ * limitations under the License. */ -#include "CircleExpContract.h" +#include "NonMaxSuppressionV5Options.h" -#include <oops/InternalExn.h> - -#include <fstream> -#include <iostream> - -bool CircleExpContract::store(const char *ptr, const size_t size) const +namespace tflite2circle { - if (!ptr) - INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason"); - - std::ofstream fs(_filepath.c_str(), std::ofstream::binary); - fs.write(ptr, size); - return fs.good(); +flatbuffers::Offset<circle::NonMaxSuppressionV5Options> +build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb, + const tflite::Operator *) +{ + circle::NonMaxSuppressionV5OptionsBuilder builtin_options_builder{fb}; + return builtin_options_builder.Finish(); } + +} // namespace tflite2circle diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h new file mode 100644 index 000000000..faf989acc --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__ +#define __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__ + +#include <mio/tflite/schema_generated.h> +#include <mio/circle/schema_generated.h> + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::NonMaxSuppressionV5Options> +build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb, + const tflite::Operator *op); + +} // namespace tflite2circle + +#endif // __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index cb4437a49..14c44cb36 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -119,6 +119,75 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla // is_variable bool is_variable = it->is_variable(); + flatbuffers::Offset<circle::SparsityParameters> sparsity; + // sparsity + if (it->sparsity()) + { + flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order; + flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map; + flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>> + dim_metadata; + + // traversal_order + if (it->sparsity()->traversal_order()) + { + auto traversal_order_vec = std::vector<int32_t>{ + it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()}; + traversal_order = fb->CreateVector(traversal_order_vec); + } + + // block_map + if (it->sparsity()->block_map()) + { + auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(), + it->sparsity()->block_map()->end()}; + block_map = fb->CreateVector(block_map_vec); + } + + // dim_metadata + std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec; + auto tflite_dim_metadata = it->sparsity()->dim_metadata(); + for (auto it : *tflite_dim_metadata) + { + // array_segments + auto tflite_array_segments_type = it->array_segments_type(); + auto circle_array_segments = + get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type); + auto circle_array_segments_type = + get_circle_sparse_index_vector_type(tflite_array_segments_type); + + // array_indices + auto tflite_array_indices_type = it->array_indices_type(); + auto circle_array_indices = + get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type); + auto circle_array_indices_type = + get_circle_sparse_index_vector_type(tflite_array_indices_type); + + auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb}; + + circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format())); + circle_dim_metadata_builder.add_dense_size(it->dense_size()); + circle_dim_metadata_builder.add_array_segments(circle_array_segments); + circle_dim_metadata_builder.add_array_segments_type(circle_array_segments_type); + circle_dim_metadata_builder.add_array_indices(circle_array_indices); + circle_dim_metadata_builder.add_array_indices_type(circle_array_indices_type); + auto dim_metadata = circle_dim_metadata_builder.Finish(); + dim_metadata_vec.emplace_back(dim_metadata); + } + dim_metadata = fb->CreateVector(dim_metadata_vec); + + sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata); + } + + // shape signature + flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature; + if (it->shape_signature()) + { + auto shape_signature_vec = + std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()}); + shape_signature = fb->CreateVector(shape_signature_vec); + } + circle::TensorBuilder tensor_builder{*fb}; tensor_builder.add_shape(shape); tensor_builder.add_type(get_circle_tensortype(it->type())); @@ -126,6 +195,8 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla tensor_builder.add_name(name); tensor_builder.add_quantization(quantization); tensor_builder.add_is_variable(is_variable); + tensor_builder.add_sparsity(sparsity); + tensor_builder.add_shape_signature(shape_signature); auto tensor = tensor_builder.Finish(); tensor_vec.emplace_back(tensor); } @@ -226,6 +297,14 @@ CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model) : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb} { const tflite::Model *tfl_model = model.load_model(); + // verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()), + model._data.size()}; + if (!tflite::VerifyModelBuffer(verifier)) + { + throw std::runtime_error("ERROR: Failed to verify tflite"); + } + _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes()); _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs()); diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp index b0d35d1a5..75504b062 100644 --- a/compiler/tflite2circle/src/DataLookup.cpp +++ b/compiler/tflite2circle/src/DataLookup.cpp @@ -123,4 +123,79 @@ circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode) } } +circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type) +{ + switch (tfl_dim_type) + { + case tflite::DimensionType_DENSE: + return circle::DimensionType_DENSE; + case tflite::DimensionType_SPARSE_CSR: + return circle::DimensionType_SPARSE_CSR; + default: + throw std::runtime_error("tflite2circle: wrong dimension type."); + } +} + +flatbuffers::Offset<void> +get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, + const tflite::DimensionMetadata *dm, + const tflite::SparseIndexVector &tfl_sparse_index_vector_type) +{ + switch (tfl_sparse_index_vector_type) + { + case tflite::SparseIndexVector_NONE: + return flatbuffers::Offset<void>(); + case tflite::SparseIndexVector_Int32Vector: + { + auto values_vec_int32 = + std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(), + dm->array_segments_as_Int32Vector()->values()->end()}; + auto values_int32 = fb.CreateVector(values_vec_int32); + circle::Int32VectorBuilder int32_vector_builder{fb}; + int32_vector_builder.add_values(values_int32); + return int32_vector_builder.Finish().Union(); + } + case tflite::SparseIndexVector_Uint16Vector: + { + auto values_vec_uint16 = + std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(), + dm->array_segments_as_Uint16Vector()->values()->end()}; + auto values_uint16 = fb.CreateVector(values_vec_uint16); + circle::Uint16VectorBuilder uint16_vector_builder{fb}; + uint16_vector_builder.add_values(values_uint16); + return uint16_vector_builder.Finish().Union(); + } + case tflite::SparseIndexVector_Uint8Vector: + { + auto values_vec_uint8 = + std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(), + dm->array_segments_as_Uint8Vector()->values()->end()}; + auto values_uint8 = fb.CreateVector(values_vec_uint8); + circle::Uint8VectorBuilder uint8_vector_builder{fb}; + uint8_vector_builder.add_values(values_uint8); + return uint8_vector_builder.Finish().Union(); + } + default: + throw std::runtime_error("tflite2circle: wrong SparseIndexVector type."); + } +} + +circle::SparseIndexVector +get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type) +{ + switch (tfl_sparse_index_vector_type) + { + case tflite::SparseIndexVector_NONE: + return circle::SparseIndexVector_NONE; + case tflite::SparseIndexVector_Int32Vector: + return circle::SparseIndexVector_Int32Vector; + case tflite::SparseIndexVector_Uint16Vector: + return circle::SparseIndexVector_Uint16Vector; + case tflite::SparseIndexVector_Uint8Vector: + return circle::SparseIndexVector_Uint8Vector; + default: + throw std::runtime_error("tflite2circle: wrong SparseIndexVector type."); + } +} + } // namespace tflite2circle diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h index 7ea01b9c8..26ad74666 100644 --- a/compiler/tflite2circle/src/DataLookup.h +++ b/compiler/tflite2circle/src/DataLookup.h @@ -76,6 +76,25 @@ circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *o */ circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode); +/** + * @brief Returns circle DimensionType according to tflite. +*/ +circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type); + +/** + * @brief Returns circle SparseIndexVector according to tflite. +*/ +flatbuffers::Offset<void> +get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, + const tflite::DimensionMetadata *dm, + const tflite::SparseIndexVector &tfl_sparse_index_vector_type); + +/** + * @brief Returns circle SparseIndexVector type according to tflite. +*/ +circle::SparseIndexVector +get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type); + } // namespace tflite2circle #endif // __DATA_LOOKUP_H__ diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst index a2a14538e..22b59863b 100644 --- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst +++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst @@ -101,7 +101,7 @@ TFL_BUILTIN_OPTIONS(IfOptions) TFL_BUILTIN_OPTIONS(WhileOptions) TFL_BUILTIN_OPTIONS(DepthToSpaceOptions) TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options) -//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options) +TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options) TFL_BUILTIN_OPTIONS(RankOptions) TFL_BUILTIN_OPTIONS(ScatterNdOptions) TFL_BUILTIN_OPTIONS(SegmentSumOptions) diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index b8cb79331..be4398996 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000000080001) + set(VCONONE_VERSION 0x0000000000090001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option |