diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2022-04-15 19:15:11 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2022-04-15 19:15:11 +0900 |
commit | 3ad689f0803519e343c36d5700646e86059df961 (patch) | |
tree | 862346c401a5577518fa7f042532aa931b53aa0e /compiler | |
parent | ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff) | |
download | nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2 nnfw-3ad689f0803519e343c36d5700646e86059df961.zip |
Imported Upstream version 1.20.0upstream/1.20.0submit/tizen/20220415.103159
Diffstat (limited to 'compiler')
867 files changed, 33719 insertions, 10210 deletions
diff --git a/compiler/angkor/CMakeLists.txt b/compiler/angkor/CMakeLists.txt index 44b5e9058..7f5cb88c2 100644 --- a/compiler/angkor/CMakeLists.txt +++ b/compiler/angkor/CMakeLists.txt @@ -5,7 +5,9 @@ list(REMOVE_ITEM SOURCES ${TESTS}) # NOTE STATIC is deliberately used here to allow clients to use 'angkor' without installation add_library(angkor STATIC ${HEADERS} ${SOURCES}) -set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif (NOT NNCC_LIBRARY_NO_PIC) set_target_properties(angkor PROPERTIES LINKER_LANGUAGE CXX) target_include_directories(angkor PUBLIC include) target_link_libraries(angkor PRIVATE nncc_common) diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp index 4e88f0cb7..63121b845 100644 --- a/compiler/arser/tests/arser.test.cpp +++ b/compiler/arser/tests/arser.test.cpp @@ -23,30 +23,9 @@ #include "arser/arser.h" -using namespace arser; +#include "Prompt.h" -class Prompt -{ -public: - Prompt(const std::string &command) - { - std::istringstream iss(command); - std::vector<std::string> token(std::istream_iterator<std::string>{iss}, - std::istream_iterator<std::string>()); - _arg = std::move(token); - _argv.reserve(_arg.size()); - for (const auto &t : _arg) - { - _argv.push_back(const_cast<char *>(t.data())); - } - } - int argc(void) const { return _argv.size(); } - char **argv(void) { return _argv.data(); } - -private: - std::vector<char *> _argv; - std::vector<std::string> _arg; -}; +using namespace arser; TEST(BasicTest, option) { @@ -57,7 +36,7 @@ TEST(BasicTest, option) .nargs(0) .help("It provides additional details as to what the executable is doing"); - Prompt prompt("./executable --verbose"); + test::Prompt prompt("./executable --verbose"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -79,7 +58,7 @@ TEST(BasicTest, OptionalArgument) .type(arser::DataType::FLOAT) .help("Set a frequency as you provided."); - Prompt prompt("./radio --volume 5 --frequency 128.5"); + test::Prompt prompt("./radio --volume 5 --frequency 128.5"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -103,7 +82,7 @@ TEST(BasicTest, NonRequiredOptionalArgument_NEG) .type(arser::DataType::INT32) .help("Set a volume as you provided."); - Prompt prompt("./radio"); // empty argument + test::Prompt prompt("./radio"); // empty argument /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -122,7 +101,7 @@ TEST(BasicTest, RequiredOptionalArgument_NEG) .required() .help("Set a volume as you provided."); - Prompt prompt("./radio"); + test::Prompt prompt("./radio"); /* act */ /* assert */ EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error); } @@ -134,7 +113,7 @@ TEST(BasicTest, OptionalMultipleArgument) arser.add_argument("--add").nargs(2).type(arser::DataType::INT32_VEC).help("Add two numbers."); - Prompt prompt("./calculator --add 3 5"); + test::Prompt prompt("./calculator --add 3 5"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -167,8 +146,8 @@ TEST(BasicTest, MultipleOptionalArgument) .help("give traning data to this program.") .required(); - Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put " - "--training_data 2 43 234 3 334"); + test::Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put " + "--training_data 2 43 234 3 334"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -195,7 +174,7 @@ TEST(BasicTest, MultipleFloatValue) .type(arser::DataType::FLOAT_VEC) .help("Add two float numbers."); - Prompt prompt("./calculator --add_float 3.2 5.4"); + test::Prompt prompt("./calculator --add_float 3.2 5.4"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -217,7 +196,7 @@ TEST(BasicTest, MultipleStringValue) .type(arser::DataType::STR_VEC) .help("insert your three favorite color"); - Prompt prompt("./color_factory --three_color red blue yellow"); + test::Prompt prompt("./color_factory --three_color red blue yellow"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -241,7 +220,7 @@ TEST(BasicTest, ExitWithFunctionCall) arser.add_argument("--name").nargs(1).type(arser::DataType::STR).help("Name your hero"); - Prompt prompt("./hero --history"); + test::Prompt prompt("./hero --history"); /* act */ /* assert */ EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "When I was young.."); @@ -258,7 +237,7 @@ TEST(BasicTest, ExitWithFunctionCallWithBind) .help("Show version and exit") .exit_with(std::bind(printVersion, "1.2.0")); - Prompt prompt("./arser --version"); + test::Prompt prompt("./arser --version"); /* act */ /* assert */ EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "arser version : 1.2.0"); @@ -275,7 +254,7 @@ TEST(BasicTest, ExitWithFunctionCallWithLamda) arser.add_argument("OS").nargs(1).type(arser::DataType::STR).help("The OS you want to boot"); - Prompt prompt("./computer --shutdown"); + test::Prompt prompt("./computer --shutdown"); /* act */ /* assert */ EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "Good bye.."); } @@ -315,7 +294,7 @@ TEST(BasicTest, DefaultValue) .default_value("no name") .help("Enter your name"); - Prompt prompt("/phone --time 1 52 34 --name arser"); + test::Prompt prompt("/phone --time 1 52 34 --name arser"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -359,7 +338,7 @@ TEST(BasicTest, shortOption) .help("output path of this program.") .required(true); - Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put"); + test::Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -385,7 +364,7 @@ TEST(BasicTest, shortMultipleOption) .help("output path of this program.") .required(true); - Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put"); + test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -411,7 +390,7 @@ TEST(BasicTest, OptWithRequiredDuplicate_NEG) .help("output path of this program.") .required(true); - Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate"); + test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate"); /* act */ /* assert */ EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error); } @@ -432,7 +411,7 @@ TEST(BasicTest, OptWithNonRequiredDuplicate) .help("output path of this program.") .required(true); - Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate"); + test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -449,7 +428,7 @@ TEST(BasicTest, AccumulateVectorOptions) arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC); - Prompt prompt("./driver --specify a b c --specify 1 2 3"); + test::Prompt prompt("./driver --specify a b c --specify 1 2 3"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -473,7 +452,7 @@ TEST(BasicTest, AccumulateScalarOptions) arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT); - Prompt prompt("./driver --specify 1 --specify 2"); + test::Prompt prompt("./driver --specify 1 --specify 2"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ @@ -491,7 +470,7 @@ TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG) arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT); - Prompt prompt("./driver --specify 1 --specify 2"); + test::Prompt prompt("./driver --specify 1 --specify 2"); /* act */ arser.parse(prompt.argc(), prompt.argv()); /* assert */ diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt new file mode 100644 index 000000000..4d86f8097 --- /dev/null +++ b/compiler/circle-eval-diff/CMakeLists.txt @@ -0,0 +1,34 @@ +set(DRIVER "driver/Driver.cpp") + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_executable(circle-eval-diff ${DRIVER} ${SOURCES}) +target_include_directories(circle-eval-diff PRIVATE include) + +target_link_libraries(circle-eval-diff arser) +target_link_libraries(circle-eval-diff safemain) +target_link_libraries(circle-eval-diff foder) +target_link_libraries(circle-eval-diff loco) +target_link_libraries(circle-eval-diff luci_import) +target_link_libraries(circle-eval-diff luci_lang) +target_link_libraries(circle-eval-diff luci_interpreter) +target_link_libraries(circle-eval-diff dio_hdf5) +target_link_libraries(circle-eval-diff vconone) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +# circle-eval-diff is executable, so we do not link it to the test. +# Instead, we use TEST_SOURCES to specify sources uesd for tests. +set(TEST_SOURCES + "src/MetricPrinter.cpp" + "src/Tensor.cpp") + +nnas_find_package(GTest REQUIRED) +GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES}) +target_include_directories(circle_eval_diff_test PRIVATE src) +target_link_libraries(circle_eval_diff_test luci_testhelper) +target_link_libraries(circle_eval_diff_test nncc_coverage) diff --git a/compiler/circle-eval-diff/README.md b/compiler/circle-eval-diff/README.md new file mode 100644 index 000000000..a3727cc6d --- /dev/null +++ b/compiler/circle-eval-diff/README.md @@ -0,0 +1,51 @@ +# circle-eval-diff + +_circle-eval-diff_ compares inference results of two circle models. + +## Use cases + +1. _circle-eval-diff_ can be used to evaluate reconstruction errors of quantized models. +2. _circle-eval-diff_ can be used to verify optimization (or any kind of value-preserving conversion) is safe. + +## Usage + +Run circle-eval-diff with the following arguments. + +--first_input_model: first model to compare (.circle). + +--second_input_model: second model to compare (.circle). + +--first_input_data: input data for the first model (.h5, directory). Random data will be used if this argument is not given. + +--second_input_data: input data for the second model (.h5, directory). Random data will be used if this argument is not given. + +--input_data_format: input data format (h5 (default), directory). + +--metric: metric to compare inference results (MAE (default), etc). + +``` +$ ./circle-eval-diff + --first_input_model <first_input_model> + --second_input_model <second_input_model> + --first_input_data <first_input_data> + --second_input_data <second_input_data> + --input_data_format <data_format> + --metric <metric> +``` + +For example, +``` +$ ./circle-eval-diff + --first_input_model A.circle + --second_input_model B.circle + --first_input_data A.h5 + --second_input_data B.h5 + --input_data_format h5 + --metric MAE +``` + +It will print MAE (Mean Absolute Error) between the inference result of A.circle with A.h5 and that of B.circle with B.h5. + +## Note + +Circle models are executed by _luci-interpreter_. diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp new file mode 100644 index 000000000..f4a12a403 --- /dev/null +++ b/compiler/circle-eval-diff/driver/Driver.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleEvalDiff.h" + +#include <arser/arser.h> +#include <vconone/vconone.h> + +using namespace circle_eval_diff; + +namespace +{ + +std::string to_lower_case(std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); }); + return s; +} + +Metric to_metric(const std::string &str) +{ + if (to_lower_case(str).compare("mae") == 0) + return Metric::MAE; + + throw std::runtime_error("Unsupported metric."); +} + +InputFormat to_input_format(const std::string &str) +{ + if (to_lower_case(str).compare("h5") == 0) + return InputFormat::H5; + + throw std::runtime_error("Unsupported input format."); +} + +void print_version(void) +{ + std::cout << "circle-eval-diff version " << vconone::get_string() << std::endl; + std::cout << vconone::get_copyright() << std::endl; +} + +} // namespace + +int entry(const int argc, char **argv) +{ + arser::Arser arser("Compare inference results of two circle models"); + + arser.add_argument("--version") + .nargs(0) + .required(false) + .default_value(false) + .help("Show version information and exit") + .exit_with(print_version); + + arser.add_argument("--first_model") + .nargs(1) + .type(arser::DataType::STR) + .required(true) + .help("First input model filepath"); + + arser.add_argument("--second_model") + .nargs(1) + .type(arser::DataType::STR) + .required(true) + .help("Second input model filepath"); + + arser.add_argument("--first_input_data") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .help("Input data filepath for the first model. If not given, circle-eval-diff will run with " + "randomly generated data"); + + arser.add_argument("--second_input_data") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .help("Input data filepath for the second model. If not given, circle-eval-diff will run with " + "randomly generated data"); + + arser.add_argument("--metric") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .default_value("MAE") + .help("Metric for comparison (default: MAE)"); + + arser.add_argument("--input_data_format") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .default_value("h5") + .help("Input data format. h5/hdf5 (default) or directory"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cout << err.what() << std::endl; + std::cout << arser; + return 255; + } + + const auto first_model_path = arser.get<std::string>("--first_model"); + const auto second_model_path = arser.get<std::string>("--second_model"); + + // Default values + std::string first_input_data_path; + std::string second_input_data_path; + std::string metric; + std::string input_data_format; + + if (arser["--first_input_data"]) + first_input_data_path = arser.get<std::string>("--first_input_data"); + + if (arser["--second_input_data"]) + second_input_data_path = arser.get<std::string>("--second_input_data"); + + if (arser["--first_input_data"] != arser["--second_input_data"]) + throw std::runtime_error("Input data path should be given for both first_model and " + "second_model, or neither must be given."); + + metric = arser.get<std::string>("--metric"); + input_data_format = arser.get<std::string>("--input_data_format"); + + auto ctx = std::make_unique<CircleEvalDiff::Context>(); + { + ctx->first_model_path = first_model_path; + ctx->second_model_path = second_model_path; + ctx->metric = to_metric(metric); + ctx->input_format = to_input_format(input_data_format); + } + + CircleEvalDiff ced(std::move(ctx)); + + ced.init(); + + ced.evalDiff(first_input_data_path, second_input_data_path); + + return EXIT_SUCCESS; +} diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h new file mode 100644 index 000000000..bf6aff46d --- /dev/null +++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_H__ +#define __CIRCLE_EVAL_DIFF_H__ + +#include <luci/IR/Module.h> +#include <luci_interpreter/Interpreter.h> + +#include <string> +#include <memory> + +namespace circle_eval_diff +{ + +// Forward declaration +class ModuleEvalDiff; + +enum class Metric +{ + Undefined, // For debugging + MAE, +}; + +enum class InputFormat +{ + Undefined, // For debugging + H5, + // TODO Implement Random, Directory +}; + +class CircleEvalDiff final +{ +public: + struct Context + { + std::string first_model_path; + std::string second_model_path; + Metric metric = Metric::Undefined; + InputFormat input_format = InputFormat::Undefined; + }; + +public: + CircleEvalDiff(std::unique_ptr<Context> &&ctx); + + ~CircleEvalDiff(); + + void init(); + + // Evaluate two circle models for the given input data and compare the results + void evalDiff(const std::string &first_input_data_path, + const std::string &second_input_data_path) const; + +private: + std::unique_ptr<Context> _ctx; + std::unique_ptr<ModuleEvalDiff> _runner; +}; + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_H__ diff --git a/compiler/circle-eval-diff/requires.cmake b/compiler/circle-eval-diff/requires.cmake new file mode 100644 index 000000000..cae9b7c62 --- /dev/null +++ b/compiler/circle-eval-diff/requires.cmake @@ -0,0 +1,7 @@ +require("loco") +require("luci") +require("luci-interpreter") +require("dio-hdf5") +require("safemain") +require("arser") +require("vconone") diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp new file mode 100644 index 000000000..c39a11371 --- /dev/null +++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleEvalDiff.h" +#include "ModuleEvalDiff.h" +#include "MetricPrinter.h" + +#include <foder/FileLoader.h> +#include <luci/Importer.h> + +#include <stdexcept> + +namespace +{ + +std::unique_ptr<luci::Module> import(const std::string &model_path) +{ + // Load model from the file + foder::FileLoader loader{model_path}; + std::vector<char> model_data = loader.load(); + + // Verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()), + model_data.size()}; + if (not circle::VerifyModelBuffer(verifier)) + { + throw std::runtime_error("Failed to verify circle '" + model_path + "'"); + } + + auto module = luci::Importer().importModule(circle::GetModel(model_data.data())); + + if (not module) + throw std::runtime_error("Failed to load '" + model_path + "'"); + + return module; +} + +} // namespace + +namespace circle_eval_diff +{ + +CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) + : _ctx(std::move(ctx)), _runner(nullptr) +{ +} + +CircleEvalDiff::~CircleEvalDiff() = default; + +void CircleEvalDiff::init() +{ + // Set metric + std::unique_ptr<MetricPrinter> metric; + switch (_ctx->metric) + { + case Metric::MAE: + metric = std::make_unique<MAEPrinter>(); + break; + default: + throw std::runtime_error("Unsupported metric."); + } + + auto first_module = import(_ctx->first_model_path); + auto second_module = import(_ctx->second_model_path); + + // Set runner + switch (_ctx->input_format) + { + case InputFormat::H5: + _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module), + std::move(metric)); + break; + default: + throw std::runtime_error("Unsupported input format."); + } +} + +void CircleEvalDiff::evalDiff(const std::string &first_input_data_path, + const std::string &second_input_data_path) const +{ + _runner->evalDiff(first_input_data_path, second_input_data_path); +} + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp new file mode 100644 index 000000000..d65eb9b63 --- /dev/null +++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MetricPrinter.h" + +#include <luci/IR/CircleNode.h> + +#include <iostream> +#include <cassert> + +using Tensor = circle_eval_diff::Tensor; + +#define THROW_UNLESS(COND, MSG) \ + if (not(COND)) \ + throw std::runtime_error(MSG); + +namespace +{ + +template <typename T> bool same_shape(const T a, const T b) +{ + if (a->rank() != b->rank()) + return false; + + for (uint32_t i = 0; i < a->rank(); i++) + { + if (not(a->dim(i) == b->dim(i))) + return false; + } + + return true; +} + +template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor) +{ + assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS + + auto fp32_tensor = std::make_shared<Tensor>(); + { + fp32_tensor->dtype(loco::DataType::FLOAT32); + fp32_tensor->rank(tensor->rank()); + for (uint32_t i = 0; i < tensor->rank(); i++) + fp32_tensor->dim(i) = tensor->dim(i); + + const auto num_elems = tensor->size<DT>(); + fp32_tensor->size<loco::DataType::FLOAT32>(num_elems); + for (uint32_t i = 0; i < num_elems; i++) + fp32_tensor->at<loco::DataType::FLOAT32>(i) = static_cast<float>(tensor->at<DT>(i)); + } + return fp32_tensor; +} + +std::shared_ptr<Tensor> fp32(const std::shared_ptr<Tensor> &tensor) +{ + switch (tensor->dtype()) + { + case loco::DataType::FLOAT32: + return tensor; + case loco::DataType::U8: + return to_fp32<loco::DataType::U8>(tensor); + case loco::DataType::S16: + return to_fp32<loco::DataType::S16>(tensor); + default: + throw std::runtime_error("Unsupported data type."); + } +} + +} // namespace + +namespace circle_eval_diff +{ + +void MAEPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]); + assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS + + // Create tensors to store intermediate results + _intermediate.emplace_back(); + _intermediate.at(i).dtype(loco::DataType::FLOAT32); + // NOTE Use both first_node and second_node to avoid release build break + _intermediate.at(i).rank(first_node->rank()); + uint32_t num_elems = 1; + for (uint32_t j = 0; j < second_node->rank(); j++) + { + _intermediate.at(i).dim(j) = second_node->dim(j); + num_elems *= second_node->dim(j).value(); + } + _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems); + + // Check the buffer is initilized with zero + for (uint32_t j = 0; j < num_elems; j++) + assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +void MAEPrinter::accum_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++) + { + _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) += + std::abs(a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)); + } +} + +void MAEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 and then compute absolute error + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_absolute_error(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MAEPrinter::dump(std::ostream &os) const +{ + os << "Mean Absolute Error (MAE)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto &inter = _intermediate.at(output_idx); + assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS + const auto elem_count = inter.size<loco::DataType::FLOAT32>(); + + // Compute MAE + float mae = 0.0; + for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++) + mae += inter.at<loco::DataType::FLOAT32>(elem_idx); + + mae = mae / elem_count; + mae = mae / _num_data; + + os << "MAE for " << name << " is " << mae << std::endl; + } +} + +} // namespace circle_eval_diff + +#undef THROW_UNLESS diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h new file mode 100644 index 000000000..b51581c31 --- /dev/null +++ b/compiler/circle-eval-diff/src/MetricPrinter.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__ +#define __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__ + +#include <luci/IR/Module.h> + +#include "Tensor.h" + +#include <vector> +#include <iostream> + +namespace circle_eval_diff +{ + +// Class to print metrics +// How to use? +// +// MetricPrinter metric; +// metric.init(first_module, second_module); // optional initialization +// +// for (..) // Evaluate data one by one +// { +// .. +// metric.accumulate(first_result, second_result); // accumulate results +// } +// +// std::cout << &metric << std::endl; // print result +class MetricPrinter +{ +public: + virtual ~MetricPrinter() = default; + + // Child class can implement this function if necessary + // NOTE init can be skipped + virtual void init(const luci::Module *, const luci::Module *) {} + + // Accumulate results of comparing the first and the second model's outputs + virtual void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) = 0; + + // Dump the final result of the corresponding metric + virtual void dump(std::ostream &os) const = 0; +}; + +static inline std::ostream &operator<<(std::ostream &os, const MetricPrinter *m) +{ + m->dump(os); + return os; +} + +// Mean Absolute Error +class MAEPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second); + + void dump(std::ostream &os) const; + +private: + void accum_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b); + +private: + // Store accumulated sum of absolute error for each output + std::vector<Tensor> _intermediate; + std::vector<std::string> _output_names; + uint32_t _num_data = 0; +}; + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__ diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp new file mode 100644 index 000000000..51ca89799 --- /dev/null +++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MetricPrinter.h" + +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +using Tensor = circle_eval_diff::Tensor; + +namespace +{ + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template <typename T> +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector<uint32_t> &shape, + const std::vector<T> &values) +{ + auto node = g->nodes()->create<luci::CircleConst>(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size<DT>(size); \ + for (uint32_t i = 0; i < values.size(); ++i) \ + node->at<DT>(i) = values[i]; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph which adds constant (addition) to the input + * + * [Input] [Const] (addition) + * \ / + * [Add] + * + */ +class AddGraphlet +{ +public: + AddGraphlet() = default; + + void init(loco::Graph *g, float addition) + { + std::vector<float> addition_val; + for (uint32_t i = 0; i < 16; i++) + addition_val.push_back(addition); + _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val); + + _add = g->nodes()->create<luci::CircleAdd>(); + _add->y(_add_c); + _add->fusedActivationFunction(luci::FusedActFunc::NONE); + _add->dtype(loco::DataType::FLOAT32); + _add->shape({1, 16}); + _add->name("add"); + } + +protected: + luci::CircleAdd *_add = nullptr; + luci::CircleConst *_add_c = nullptr; +}; + +class AddOneGraph : public luci::test::TestIOGraph, public AddGraphlet +{ +public: + AddOneGraph() = default; + + void init(void) + { + luci::test::TestIOGraph::init({1, 4}, {1, 16}); + AddGraphlet::init(g(), 1.0); + + _add->x(input()); + + output()->from(_add); + } + + std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); } +}; + +class AddTwoGraph : public luci::test::TestIOGraph, public AddGraphlet +{ +public: + AddTwoGraph() = default; + + void init(void) + { + luci::test::TestIOGraph::init({1, 4}, {1, 16}); + AddGraphlet::init(g(), 2.0); + + _add->x(input()); + + output()->from(_add); + } + + std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); } +}; + +// Return number of elements of the node. +uint32_t numElements(const luci::CircleNode *node) +{ + uint32_t num_elem = 1; + for (uint32_t i = 0; i < node->rank(); ++i) + num_elem *= node->dim(i).value(); + return num_elem; +} + +// Return Tensor which has the same dtype and shape with node. +// Buffer does not have any data yet. +std::shared_ptr<Tensor> create_empty_tensor(const luci::CircleNode *node) +{ + auto tensor = std::make_shared<Tensor>(); + { + tensor->dtype(node->dtype()); + tensor->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + tensor->dim(i) = node->dim(i); + tensor->size<loco::DataType::FLOAT32>(numElements(node)); + } + + return tensor; +} + +std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, float value) +{ + auto outputs = loco::output_nodes(module->graph()); + assert(outputs.size() == 1); + auto output = *outputs.begin(); + auto output_cnode = loco::must_cast<luci::CircleNode *>(output); + auto tensor = create_empty_tensor(output_cnode); + auto tensor_size = tensor->size<loco::DataType::FLOAT32>(); + for (uint32_t i = 0; i < tensor_size; i++) + { + tensor->at<loco::DataType::FLOAT32>(i) = value; + } + return tensor; +} + +} // namespace + +namespace circle_eval_diff +{ + +TEST(CircleEvalMetricPrinterTest, MAE_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MAEPrinter mae; + + mae.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + auto output = output_tensor_with_value(&first, 1.0); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + auto output = output_tensor_with_value(&second, 2.0); + second_result.emplace_back(output); + } + + mae.accumulate(first_result, second_result); + + std::stringstream ss; + mae.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MAE for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG) +{ + MAEPrinter mae; + + EXPECT_ANY_THROW(mae.init(nullptr, nullptr)); +} + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp new file mode 100644 index 000000000..85f985873 --- /dev/null +++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleEvalDiff.h" +#include "Tensor.h" + +#include <luci_interpreter/Interpreter.h> +#include <dio_hdf5/HDF5Importer.h> + +#include <string> +#include <stdexcept> +#include <iostream> +#include <cassert> + +using Tensor = circle_eval_diff::Tensor; +using DataType = loco::DataType; +using Shape = std::vector<loco::Dimension>; +using HDF5Importer = dio::hdf5::HDF5Importer; + +namespace +{ + +// Check the type and the shape of CircleInput +void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape) +{ + // Type check + if (dtype != input_node->dtype()) + throw std::runtime_error("Wrong input type."); + + if (shape.size() != input_node->rank()) + throw std::runtime_error("Input rank mismatch."); + + for (uint32_t i = 0; i < shape.size(); i++) + { + if (not(shape.at(i) == input_node->dim(i))) + throw std::runtime_error("Input shape mismatch."); + } +} + +// Return number of elements of the node. +uint32_t numElements(const luci::CircleNode *node) +{ + uint32_t num_elem = 1; + for (uint32_t i = 0; i < node->rank(); ++i) + num_elem *= node->dim(i).value(); + return num_elem; +} + +// Return Tensor which has the same dtype and shape with node. +// Buffer does not have any data yet. +std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node) +{ + auto tensor = std::make_shared<Tensor>(); + { + tensor->dtype(node->dtype()); + tensor->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + tensor->dim(i) = node->dim(i); + + switch (node->dtype()) + { + case loco::DataType::FLOAT32: + tensor->size<loco::DataType::FLOAT32>(numElements(node)); + break; + case loco::DataType::U8: + tensor->size<loco::DataType::U8>(numElements(node)); + break; + case loco::DataType::S16: + tensor->size<loco::DataType::S16>(numElements(node)); + break; + case loco::DataType::S32: + tensor->size<loco::DataType::S32>(numElements(node)); + break; + case loco::DataType::S64: + tensor->size<loco::DataType::S64>(numElements(node)); + break; + default: + throw std::runtime_error("Unsupported input tensor dtype for " + node->name()); + } + } + + return tensor; +} + +} // namespace + +namespace circle_eval_diff +{ + +void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path, + const std::string &second_input_data_path) const +{ + const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get()); + + _metric->init(_first_module.get(), _second_module.get()); + + try + { + HDF5Importer first_h5(first_input_data_path); + first_h5.importGroup("value"); + + HDF5Importer second_h5(second_input_data_path); + second_h5.importGroup("value"); + + const auto first_num_data = first_h5.numData(); + const auto second_num_data = second_h5.numData(); + + if (first_num_data != second_num_data) + throw std::runtime_error( + "Number of data in the first data file and the second data file mismatches."); + + if (first_num_data == 0) + throw std::runtime_error("Input data file does not contain any record."); + + const auto first_input_nodes = loco::input_nodes(_first_module->graph()); + const auto first_num_inputs = first_input_nodes.size(); + const auto first_output_nodes = loco::output_nodes(_first_module->graph()); + const auto first_num_outputs = first_output_nodes.size(); + + const auto second_input_nodes = loco::input_nodes(_second_module->graph()); + const auto second_num_inputs = second_input_nodes.size(); + const auto second_output_nodes = loco::output_nodes(_second_module->graph()); + const auto second_num_outputs = second_output_nodes.size(); + + for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++) + { + std::cout << "Evaluating " << data_idx << "'th data" << std::endl; + + if (first_num_inputs != first_h5.numInputs(data_idx) || + second_num_inputs != second_h5.numInputs(data_idx)) + throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) + + "th data."); + + // Do inference and return output + auto eval = [&](HDF5Importer &h5, uint32_t num_inputs, + const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs, + const std::vector<loco::Node *> &output_nodes) { + // Write input data + for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++) + { + const auto *input_node = + loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]); + assert(input_node->index() == input_idx); + + auto tensor = createEmptyTensor(input_node); + if (h5.isRawData()) + { + h5.readTensor(data_idx, input_idx, tensor->buffer()); + } + else + { + DataType dtype; + Shape shape; + h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer()); + + // Check the type and the shape of the input data is valid + verifyTypeShape(input_node, dtype, shape); + } + + interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size()); + } + + // Interpret + interp->interpret(); + + // Read output data + std::vector<std::shared_ptr<Tensor>> outputs; + for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++) + { + const auto *output_node = + loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]); + assert(output_node->index() == output_idx); + + auto tensor = createEmptyTensor(output_node); + interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size()); + outputs.emplace_back(tensor); + } + + return outputs; + }; + + auto first_output = + eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes); + auto second_output = eval(second_h5, second_num_inputs, second_input_nodes, + second_num_outputs, second_output_nodes); + + // Accumulate diffs + _metric->accumulate(first_output, second_output); + } + + std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl; + } + catch (const H5::Exception &e) + { + H5::Exception::printErrorStack(); + throw std::runtime_error("HDF5 error occurred."); + } + + // Print metric + std::cout << _metric.get() << std::endl; +} + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h new file mode 100644 index 000000000..c7642f60b --- /dev/null +++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ +#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ + +#include "MetricPrinter.h" + +#include <luci/IR/Module.h> + +#include <memory> + +namespace circle_eval_diff +{ + +class ModuleEvalDiff +{ +public: + ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second, + std::unique_ptr<MetricPrinter> &&metric) + : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric)) + { + } + + virtual ~ModuleEvalDiff() = default; + + // Implement this in the child class + virtual void evalDiff(const std::string &first_input_data_path, + const std::string &second_input_data_path) const = 0; + +protected: + std::unique_ptr<luci::Module> _first_module; + std::unique_ptr<luci::Module> _second_module; + std::unique_ptr<MetricPrinter> _metric; +}; + +class H5InputEvalDiff final : public ModuleEvalDiff +{ +public: + H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second, + std::unique_ptr<MetricPrinter> &&metric) + : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric)) + { + } + + void evalDiff(const std::string &first_input_data_path, + const std::string &second_input_data_path) const; +}; + +// TODO Implement ModuleEvalDiff for random input and directory input + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp new file mode 100644 index 000000000..6710e8c3d --- /dev/null +++ b/compiler/circle-eval-diff/src/Tensor.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#include <cassert> + +namespace circle_eval_diff +{ + +#define THROW_UNLESS(COND, MSG) \ + if (not(COND)) \ + throw std::runtime_error(MSG); + +template <loco::DataType DT> uint32_t Tensor::size(void) const +{ + assert(dtype() == DT); + assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0); + return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type); +} + +template <loco::DataType DT> void Tensor::size(uint32_t l) +{ + assert(dtype() == DT); + _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type)); +} + +template <loco::DataType DT> +const typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) const +{ + assert(dtype() == DT); + THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary."); + return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n); +} + +template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) +{ + assert(dtype() == DT); + THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary."); + return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n); +} + +#undef THROW_UNLESS + +#define INSTANTIATE(DT) \ + template uint32_t Tensor::size<DT>(void) const; \ + template void Tensor::size<DT>(uint32_t); \ + template const typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t) const; \ + template typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t); + +INSTANTIATE(loco::DataType::S64); +INSTANTIATE(loco::DataType::S32); +INSTANTIATE(loco::DataType::S16); +INSTANTIATE(loco::DataType::U8); +INSTANTIATE(loco::DataType::FLOAT32); + +#undef INSTANTIATE + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h new file mode 100644 index 000000000..65ab60638 --- /dev/null +++ b/compiler/circle-eval-diff/src/Tensor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_TENSOR_H__ +#define __CIRCLE_EVAL_DIFF_TENSOR_H__ + +#include <loco.h> + +#include <vector> + +namespace circle_eval_diff +{ + +struct TensorDataType +{ +public: + const loco::DataType &dtype(void) const { return _dtype; } + void dtype(const loco::DataType &dtype) { _dtype = dtype; } + +private: + loco::DataType _dtype = loco::DataType::Unknown; +}; + +struct TensorShape +{ +public: + uint32_t rank(void) const { return _dims.size(); } + void rank(uint32_t value) { _dims.resize(value); } + + const loco::Dimension &dim(uint32_t axis) const { return _dims.at(axis); } + loco::Dimension &dim(uint32_t axis) { return _dims.at(axis); } + + void shape(std::initializer_list<uint32_t> dims) + { + rank(dims.size()); + + uint32_t axis = 0; + for (auto d : dims) + { + dim(axis++) = d; + } + } + +private: + std::vector<loco::Dimension> _dims; +}; + +// Tensor has three kinds of data +// 1. DataType (_dtype) +// 2. Shape (_dims) +// 3. Buffer (_data) +struct Tensor final : public TensorShape, public TensorDataType +{ +public: + template <loco::DataType DT> uint32_t size(void) const; + template <loco::DataType DT> void size(uint32_t size); + template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const; + template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n); + uint8_t *buffer(void) { return _data.data(); } + uint32_t byte_size(void) const { return _data.size(); } + +private: + std::vector<uint8_t> _data; +}; + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp new file mode 100644 index 000000000..3bdeaecdf --- /dev/null +++ b/compiler/circle-eval-diff/src/Tensor.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Tensor.h" + +#include <gtest/gtest.h> + +using Tensor = circle_eval_diff::Tensor; + +namespace +{ + +template <loco::DataType DT> void test_out_of_buffer_range() +{ + Tensor t; + + t.shape({1, 2, 3}); + t.dtype(DT); + t.size<DT>(6); + + EXPECT_ANY_THROW(t.at<DT>(6)); +} + +template <loco::DataType DT> void test_getter_setter() +{ + Tensor t; + + // Check shape + t.shape({1, 2, 3}); + EXPECT_EQ(3, t.rank()); + EXPECT_EQ(1, t.dim(0)); + EXPECT_EQ(2, t.dim(1)); + EXPECT_EQ(3, t.dim(2)); + + // Check dtype + t.dtype(DT); + EXPECT_EQ(DT, t.dtype()); + + // Check buffer + t.size<DT>(6); + EXPECT_EQ(6 * sizeof(typename loco::DataTypeImpl<DT>::Type), t.byte_size()); + for (uint32_t i = 0; i < 6; i++) + t.at<DT>(i) = i; + + for (uint32_t i = 0; i < 6; i++) + EXPECT_EQ(i, t.at<DT>(i)); +} + +} // namespace + +TEST(CircleEvalDiffTensorTest, constructor) +{ + Tensor t; + + EXPECT_EQ(0, t.byte_size()); + EXPECT_EQ(0, t.rank()); + EXPECT_EQ(loco::DataType::Unknown, t.dtype()); +} + +TEST(CircleEvalDiffTensorTest, getter_setter) +{ + test_getter_setter<loco::DataType::S64>(); + test_getter_setter<loco::DataType::S32>(); + test_getter_setter<loco::DataType::S16>(); + test_getter_setter<loco::DataType::U8>(); + test_getter_setter<loco::DataType::FLOAT32>(); + + SUCCEED(); +} + +TEST(CircleEvalDiffTensorTest, out_of_shape_range_NEG) +{ + Tensor t; + t.shape({1, 2, 2, 3}); + + EXPECT_ANY_THROW(t.dim(4)); +} + +TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG) +{ + test_out_of_buffer_range<loco::DataType::S64>(); + test_out_of_buffer_range<loco::DataType::S32>(); + test_out_of_buffer_range<loco::DataType::S16>(); + test_out_of_buffer_range<loco::DataType::U8>(); + test_out_of_buffer_range<loco::DataType::FLOAT32>(); + + SUCCEED(); +} diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt index 115d24860..2f657c171 100644 --- a/compiler/circle-execution-plan/CMakeLists.txt +++ b/compiler/circle-execution-plan/CMakeLists.txt @@ -1,4 +1,9 @@ set(SOURCES + pal/IScratchpadHelper.h + pal/ScratchpadHelperLinux.h + pal/ScratchpadHelperMCU.h + pal/ScratchpadHelperCMSISNN.h + pal/TargetPlatform.h src/CircleExecutionPlan.cpp src/ExecutionPlanner.cpp src/ExecutionPlanner.h @@ -13,4 +18,5 @@ target_link_libraries(circle_execution_plan luci_export) target_link_libraries(circle_execution_plan luci_plan) target_link_libraries(circle_execution_plan arser) +target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal") install(TARGETS circle_execution_plan DESTINATION bin) diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md index e789a55db..dbb7d4f85 100644 --- a/compiler/circle-execution-plan/README.md +++ b/compiler/circle-execution-plan/README.md @@ -10,13 +10,12 @@ The output circle file contains plan (`CircleNodeMemoryPlan`) information for ev - number which determines order in which nodes will be executed - memory offsets for node output tensors from the beginning of shared memory buffer -In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`. -For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data. +In order to record and read this data, we use `luci::CircleNodeExecutionPlan`. ### Execution plan building In order to build "execution plan" we use `ExecutionPlanner` class. -The main method is `get_execution_plan()` which for each node finds and writes to its annotations +The main method is `make_execution_plan()` which for each node finds and writes to its annotations "execution plan". For this purpose there are two steps: - determining the order of execution of nodes, which is stored in `_ordered_nodes` vector. Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`. diff --git a/compiler/circle-execution-plan/pal/IScratchpadHelper.h b/compiler/circle-execution-plan/pal/IScratchpadHelper.h new file mode 100644 index 000000000..f5a991526 --- /dev/null +++ b/compiler/circle-execution-plan/pal/IScratchpadHelper.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H +#define CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H + +#include <luci/IR/Nodes/CircleAveragePool2D.h> +#include <luci/IR/Nodes/CircleBatchMatMul.h> +#include <luci/IR/Nodes/CircleConv2D.h> +#include <luci/IR/Nodes/CircleDepthwiseConv2D.h> +#include <luci/IR/Nodes/CircleSVDF.h> +#include <cstdint> + +namespace circle_planner +{ + +class IScratchpadHelper +{ +public: + virtual uint32_t + ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) = 0; + + virtual std::vector<uint32_t> + ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) = 0; + + virtual uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) = 0; + + virtual uint32_t + ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) = 0; + + virtual std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) = 0; + + virtual ~IScratchpadHelper() = default; +}; + +} // namespace circle_planner + +#endif // CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h new file mode 100644 index 000000000..5369c0937 --- /dev/null +++ b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H +#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H + +#include "IScratchpadHelper.h" +#include <cassert> + +namespace circle_planner +{ + +namespace +{ + +inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +} // namespace + +class ScratchpadHelperCMSISNN : public IScratchpadHelper +{ +public: + explicit ScratchpadHelperCMSISNN(bool use_dsp) : _use_dsp(use_dsp) + { + // Do nothing + } + + uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final + { + // Main logic of arm_avgpool_s8_get_buffer_size + + const auto avg_pool_input = loco::must_cast<luci::CircleNode *>(avg_pool->value()); + + if (avg_pool_input->dtype() != loco::DataType::S8 or !_use_dsp) + return 0; + + const auto depth = static_cast<int32_t>(avg_pool_input->dim(3).value()); + + return depth * sizeof(int32_t); + } + + std::vector<uint32_t> + ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final + { + throw std::runtime_error("BatchMatMul is not currently supported for cmsisnn platform"); + } + + uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final + { + // Main logic of arm_convolve_wrapper_s8_get_buffer_size + + const auto dilation_height_factor = static_cast<int32_t>(conv->dilation()->h()); + const auto dilation_width_factor = static_cast<int32_t>(conv->dilation()->w()); + + const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input()); + const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter()); + + if (dilation_width_factor != 1 or dilation_height_factor != 1 or + conv_input->dtype() != loco::DataType::S8) + { + return 0; + } + + const auto input_depth = static_cast<int32_t>(conv_input->dim(3).value()); + + const auto input_height = static_cast<int32_t>(conv_input->dim(1).value()); + const auto input_width = static_cast<int32_t>(conv_input->dim(2).value()); + + const auto filter_height = static_cast<int32_t>(filter->dim(1).value()); + const auto filter_width = static_cast<int32_t>(filter->dim(2).value()); + + const auto stride_height = static_cast<int32_t>(conv->stride()->h()); + const auto stride_width = static_cast<int32_t>(conv->stride()->w()); + + const auto output_height = static_cast<int32_t>(conv->dim(1).value()); + const auto output_width = static_cast<int32_t>(conv->dim(2).value()); + + assert(conv_input->quantparam()->zerop.size() == 1); + assert(conv->quantparam()->zerop.size() == 1); + + const auto padding_height = computePadding(stride_height, dilation_height_factor, input_height, + filter_height, output_height); + const auto padding_width = + computePadding(stride_width, dilation_width_factor, input_width, filter_width, output_width); + + if ((padding_width == 0) && (padding_height == 0) && (input_depth % 4 == 0) && + (stride_width == 1) && (stride_height == 1) && (filter_width == 1) && (filter_height == 1)) + { + return 0; + } + + if (_use_dsp) + { + return (2 * input_depth * filter_width * filter_height) * sizeof(int16_t); + } + + return 0; + } + + uint32_t + ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final + { + // Main logic of arm_depthwise_conv_wrapper_s8_get_buffer_size + + const auto dilation_height_factor = static_cast<int32_t>(depthwise_conv->dilation()->h()); + const auto dilation_width_factor = static_cast<int32_t>(depthwise_conv->dilation()->w()); + + const auto depthwise_conv_input = loco::must_cast<luci::CircleNode *>(depthwise_conv->input()); + const auto filter = loco::must_cast<luci::CircleNode *>(depthwise_conv->filter()); + + if (dilation_width_factor != 1 or dilation_height_factor != 1 or + depthwise_conv_input->dtype() != loco::DataType::S8) + { + return 0; + } + + const auto input_depth = static_cast<int32_t>(depthwise_conv_input->dim(3).value()); + const auto output_depth = static_cast<int32_t>(depthwise_conv->dim(3).value()); + const auto batch_size = static_cast<int32_t>(depthwise_conv_input->dim(0).value()); + + if (input_depth != output_depth or batch_size != 1 or !_use_dsp) + return 0; + + const auto filter_height = static_cast<int32_t>(filter->dim(1).value()); + const auto filter_width = static_cast<int32_t>(filter->dim(2).value()); + + return input_depth * filter_height * filter_width * sizeof(int16_t); + } + + std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final + { + const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input()); + const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature()); + + if (svdf_input->dtype() == loco::DataType::FLOAT32 and + (weight_feature_input->dtype() == loco::DataType::S8 or + weight_feature_input->dtype() == loco::DataType::U8)) + { + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + std::vector<uint32_t> scratchpad_sizes; + + const auto batch_size = svdf_input->dim(0).value(); + const auto num_filters = weight_feature_input->dim(0).value(); + const auto rank = svdf->svdf_rank(); + const auto num_units = num_filters / rank; + + if (svdf_input->dtype() == loco::DataType::S8) + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t)); + scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t)); + } + else + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float)); + } + + return scratchpad_sizes; + } + +private: + bool _use_dsp; +}; + +} // namespace circle_planner + +#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h new file mode 100644 index 000000000..811aa67c3 --- /dev/null +++ b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H +#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H + +#include "IScratchpadHelper.h" +#include <loco/IR/DataTypeTraits.h> + +namespace circle_planner +{ + +class ScratchpadHelperLinux : public IScratchpadHelper +{ +public: + uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final + { + // for linux AveragePool2d scratchpad tensors size = 0 + return 0; + } + + std::vector<uint32_t> + ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final + { + const auto lhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->x()); + const auto rhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->y()); + + std::vector<uint32_t> scratchpad_sizes; + + // Scratchpad for lhs + uint32_t scratchpad_size = 1; + for (int32_t i = 0; i < lhs->rank(); ++i) + scratchpad_size *= lhs->dim(i).value(); + + scratchpad_sizes.push_back(scratchpad_size * loco::size(lhs->dtype())); + + // Scratchpad for rhs + scratchpad_size = 1; + for (int32_t i = 0; i < rhs->rank(); ++i) + scratchpad_size *= rhs->dim(i).value(); + + scratchpad_sizes.push_back(scratchpad_size * loco::size(rhs->dtype())); + + return scratchpad_sizes; + } + + uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final + { + const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input()); + const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter()); + + const uint32_t stride_height = conv->stride()->h(); + const uint32_t stride_width = conv->stride()->w(); + + const uint32_t dilation_height_factor = conv->dilation()->h(); + const uint32_t dilation_width_factor = conv->dilation()->w(); + + const uint32_t filter_height = filter->dim(1).value(); + const uint32_t filter_width = filter->dim(2).value(); + + const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1; + const bool need_non_dilated_im2col = + stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1; + const bool need_im2col = conv_input->dtype() != loco::DataType::S16 && + (need_dilated_im2col || need_non_dilated_im2col); + + if (!need_im2col) + { + return 0; + } + + const uint32_t input_depth = conv_input->dim(3).value(); + const uint32_t batches = conv_input->dim(0).value(); + + const uint32_t output_height = conv->dim(1).value(); + const uint32_t output_width = conv->dim(2).value(); + + return batches * output_height * output_width * input_depth * filter_height * filter_width * + size(conv_input->dtype()); + } + + uint32_t + ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final + { + // for linux DepthwiseConv2d scratchpad tensors size = 0 + return 0; + } + + std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final + { + const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input()); + const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature()); + + if (svdf_input->dtype() == loco::DataType::FLOAT32 and + (weight_feature_input->dtype() == loco::DataType::S8 or + weight_feature_input->dtype() == loco::DataType::U8)) + { + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + std::vector<uint32_t> scratchpad_sizes; + + const auto batch_size = svdf_input->dim(0).value(); + const auto num_filters = weight_feature_input->dim(0).value(); + const auto rank = svdf->svdf_rank(); + const auto num_units = num_filters / rank; + + if (svdf_input->dtype() == loco::DataType::S8) + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t)); + scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t)); + } + else + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float)); + } + + return scratchpad_sizes; + } +}; + +} // namespace circle_planner + +#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h new file mode 100644 index 000000000..14b41640c --- /dev/null +++ b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H +#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H + +#include "IScratchpadHelper.h" + +namespace circle_planner +{ + +class ScratchpadHelperMCU : public IScratchpadHelper +{ +public: + uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final + { + // for mcu AveragePool2d scratchpad tensors size = 0 + return 0; + } + + std::vector<uint32_t> + ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final + { + throw std::runtime_error("BatchMatMul is not currently supported for mcu platform"); + } + + uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *) final + { + // for mcu scratchpad size = 0 + return 0; + } + + uint32_t + ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final + { + // for mcu DepthwiseConv2d scratchpad tensors size = 0 + return 0; + } + + std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final + { + const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input()); + const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature()); + + if (svdf_input->dtype() == loco::DataType::FLOAT32 and + (weight_feature_input->dtype() == loco::DataType::S8 or + weight_feature_input->dtype() == loco::DataType::U8)) + { + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + std::vector<uint32_t> scratchpad_sizes; + + const auto batch_size = svdf_input->dim(0).value(); + const auto num_filters = weight_feature_input->dim(0).value(); + const auto rank = svdf->svdf_rank(); + const auto num_units = num_filters / rank; + + if (svdf_input->dtype() == loco::DataType::S8) + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t)); + scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t)); + } + else + { + scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float)); + } + + return scratchpad_sizes; + } +}; + +} // namespace circle_planner + +#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H diff --git a/compiler/circle-execution-plan/pal/TargetPlatform.h b/compiler/circle-execution-plan/pal/TargetPlatform.h new file mode 100644 index 000000000..538a502fe --- /dev/null +++ b/compiler/circle-execution-plan/pal/TargetPlatform.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H +#define CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H + +namespace circle_planner +{ + +enum SupportedPlatformType +{ + LINUX, + MCU, + CMSISNN +}; + +struct TargetPlatform +{ + SupportedPlatformType platform_type; + bool use_dsp; +}; + +} // namespace circle_planner + +#endif // CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp index a54100b8c..1788124c3 100644 --- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp +++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp @@ -35,6 +35,18 @@ int entry(int argc, char **argv) arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("--platform") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .default_value("linux") + .help("Platform name: linux mcu cmsisnn"); + arser.add_argument("--use_dsp") + .nargs(1) + .type(arser::DataType::BOOL) + .required(false) + .default_value(false) + .help("Plan with or without dsp (now can be used only with cmsisnn)"); try { @@ -47,8 +59,35 @@ int entry(int argc, char **argv) return 255; } - std::string input_path = arser.get<std::string>("input"); - std::string output_path = arser.get<std::string>("output"); + const std::string input_path = arser.get<std::string>("input"); + const std::string output_path = arser.get<std::string>("output"); + const std::string platform_name = arser.get<std::string>("--platform"); + const bool use_dsp = arser.get<bool>("--use_dsp"); + + if (platform_name != "cmsisnn" && use_dsp) + { + std::cerr << "ERROR: Now use_dsp can be used only with cmsisnn" << std::endl; + return EXIT_FAILURE; + } + + circle_planner::SupportedPlatformType platform_type; + if (platform_name == "linux") + { + platform_type = circle_planner::SupportedPlatformType::LINUX; + } + else if (platform_name == "mcu") + { + platform_type = circle_planner::SupportedPlatformType::MCU; + } + else if (platform_name == "cmsisnn") + { + platform_type = circle_planner::SupportedPlatformType::CMSISNN; + } + else + { + std::cerr << "ERROR: Invalid platform name '" << platform_name << "'" << std::endl; + return EXIT_FAILURE; + } foder::FileLoader file_loader{input_path}; std::vector<char> model_data; @@ -82,8 +121,8 @@ int entry(int argc, char **argv) auto module = importer.importModule(circle_model); // Do main job - luci::ExecutionPlanner execution_planner(module->graph()); - execution_planner.get_execution_plan(); + circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp}); + execution_planner.make_execution_plan(); // Export to output Circle file luci::CircleExporter exporter; diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp index c37d1e5f5..ec2ec1362 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp @@ -18,72 +18,49 @@ #include <loco/IR/Algorithm.h> #include <luci/UserSettings.h> -namespace luci +namespace circle_planner { namespace { -constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max(); +constexpr uint32_t node_not_assigned = std::numeric_limits<int32_t>::max(); -uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size, - uint32_t stride, uint32_t dilation_rate = 1) +bool isExecutableNode(const luci::CircleNode *node) { - const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; - switch (padding) + switch (node->opcode()) { - case Padding::SAME: - return (image_size + stride - 1) / stride; - case Padding::VALID: - return (image_size + stride - effective_filter_size) / stride; + // The following nodes denote outputs of multiple-output nodes. + // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp + case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLESPLITOUT: + case luci::CircleOpcode::CIRCLESPLITVOUT: + case luci::CircleOpcode::CIRCLEUNPACKOUT: + case luci::CircleOpcode::CIRCLEWHILEOUT: + return false; default: - assert(false); + return true; } } -// Method finds (if necessary) size for im2col temporary tensor. -uint32_t compute_im2col_size(const luci::CircleConv2D *conv) +bool isTensorProducingNode(const luci::CircleNode *node) { - auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input()); - auto filter = loco::must_cast<luci::CircleNode *>(conv->filter()); - auto padding = (conv->padding()); - uint32_t stride_height = conv->stride()->h(); - uint32_t stride_width = conv->stride()->w(); - - uint32_t dilation_height_factor = conv->dilation()->h(); - uint32_t dilation_width_factor = conv->dilation()->w(); - - uint32_t filter_height = filter->dim(1).value(); - uint32_t filter_width = filter->dim(2).value(); - - const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1; - const bool need_non_dilated_im2col = - stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1; - bool need_im2col = - conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col); - - if (!need_im2col) + switch (node->opcode()) { - return 0; + // The following nodes are multiple-output nodes. They do not produce tensors, the tensors + // are produced by the corresponding *Out nodes instead. + // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp + case luci::CircleOpcode::IF: + case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::UNPACK: + return false; + default: + return true; } - - uint32_t input_depth = conv_input->dim(3).value(); - uint32_t input_height = conv_input->dim(1).value(); - uint32_t input_width = conv_input->dim(2).value(); - - uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height, - dilation_height_factor); - uint32_t output_width = - compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor); - - uint32_t batches = conv_input->dim(0).value(); - - return batches * output_height * output_width * input_depth * filter_height * filter_width * - size(conv_input->dtype()); } } // namespace -void ExecutionPlanner::get_execution_plan() +void ExecutionPlanner::make_execution_plan() { get_default_execution_order_plan(); _required_size = get_offsets_with_greedy_by_size(); @@ -106,23 +83,23 @@ void ExecutionPlanner::get_default_execution_order_plan() void ExecutionPlanner::get_usage_interval() { // Initialize vectors of first and last nodes for usage interval - _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned); - _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned); + _alloc_node.assign(_ordered_nodes.size(), node_not_assigned); + _dealloc_node.assign(_ordered_nodes.size(), node_not_assigned); // Vector for count usages std::vector<int> usages_counts(_ordered_nodes.size(), 0); auto allocate = [this](uint32_t node, uint32_t tensor) { - if (_alloc_node[tensor] != nodeNotAssigned) + if (_alloc_node[tensor] != node_not_assigned) { return; } - assert(_dealloc_node[tensor] == nodeNotAssigned); + assert(_dealloc_node[tensor] == node_not_assigned); _alloc_node[tensor] = node; }; auto deallocate = [this](uint32_t node, uint32_t tensor) { - assert(_dealloc_node[tensor] == nodeNotAssigned); + assert(_dealloc_node[tensor] == node_not_assigned); _dealloc_node[tensor] = node; }; @@ -158,13 +135,24 @@ void ExecutionPlanner::get_usage_interval() for (uint32_t i = 0; i < _ordered_nodes.size(); i++) { const auto node = _ordered_nodes.at(i); + auto prev_nodes = preds(node); if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node)) { allocate(0, i); } - allocate(i, i); + else if (!isExecutableNode(loco::must_cast<luci::CircleNode *>(node))) + { + // If current node is multi output node than begin life time for current node should start + // when prev node start live + auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), *prev_nodes.begin()); + size_t index = std::distance(_ordered_nodes.begin(), it); + allocate(index, i); + } + else + { + allocate(i, i); + } - auto prev_nodes = preds(node); for (auto &prev_node : prev_nodes) { auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node); @@ -203,7 +191,7 @@ uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size() uint32_t ExecutionPlanner::greedy_by_size_approach() { size_t result_size = 0; - create_alloc_node_inform_vector(false, false, false); + create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads); std::vector<AllocationNodeInformation> ordered_alloc_inform; for (auto ¤t_node : _alloc_node_inform_vector) { @@ -250,22 +238,22 @@ uint32_t ExecutionPlanner::greedy_by_size_approach() } void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs, - bool null_im2col) + bool null_scratchpad) { auto node_compare = [this](const AllocationNodeInformation &alloc_1, const AllocationNodeInformation &alloc_2) { auto idx1 = alloc_1.node_num; auto idx2 = alloc_2.node_num; - if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned) + if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == node_not_assigned) { - if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned) + if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned) { return idx1 < idx2; } return true; } - if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned) + if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned) { return false; } @@ -305,30 +293,66 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu { _alloc_node_inform_vector[i].size = 0; } + else if (!isTensorProducingNode(circle_node)) + { + _alloc_node_inform_vector[i].size = 0; + } else { _alloc_node_inform_vector[i].size = node_size; } - // Im2col - auto opcode = circle_node->opcode(); - if (opcode == luci::CircleOpcode::CONV_2D) + // Scratchpad If needed + std::vector<uint32_t> scratchpad_sizes; + if (!null_scratchpad) { - auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node); - auto im2col_size = compute_im2col_size(conv); - if (im2col_size > 0) + switch (circle_node->opcode()) { - AllocationNodeInformation temp_alloc; - - if (null_im2col) + case luci::CircleOpcode::AVERAGE_POOL_2D: { - temp_alloc.size = 0; + const auto avg_pool = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node); + scratchpad_sizes.push_back( + _scratchpad_helper->ComputeScratchpadSizeAveragePool2d(avg_pool)); + break; } - else + case luci::CircleOpcode::BATCH_MATMUL: { - temp_alloc.size = im2col_size; + const auto batch_mat_mul = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node); + scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeBatchMatMul(batch_mat_mul); + break; } + case luci::CircleOpcode::CONV_2D: + { + const auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node); + scratchpad_sizes.push_back(_scratchpad_helper->ComputeScratchpadSizeConv2d(conv)); + break; + } + case luci::CircleOpcode::DEPTHWISE_CONV_2D: + { + const auto depthwise_conv = + loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node); + scratchpad_sizes.push_back( + _scratchpad_helper->ComputeScratchpadSizeDepthwiseConv2d(depthwise_conv)); + break; + } + case luci::CircleOpcode::SVDF: + { + const auto svdf = loco::must_cast<const luci::CircleSVDF *>(circle_node); + scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeSVDF(svdf); + break; + } + default: + break; + } + } + + for (const auto scratchpad_size : scratchpad_sizes) + { + if (scratchpad_size > 0) + { + AllocationNodeInformation temp_alloc; + temp_alloc.size = scratchpad_size; temp_alloc.first_node = i - 1; temp_alloc.last_node = i + 1; temp_alloc.node_num = i; @@ -352,7 +376,7 @@ void ExecutionPlanner::dump_inform() { auto current_node_it = std::find_if( _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), - [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; }); + [i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; }); for (uint32_t j = 0; j < _ordered_nodes.size(); j++) { auto first_node = _alloc_node[j]; @@ -360,7 +384,7 @@ void ExecutionPlanner::dump_inform() auto it = std::find_if( _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), - [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; }); + [j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; }); if (i >= first_node && i <= last_node) { current_node_it->breadth += it->size; @@ -386,4 +410,4 @@ void ExecutionPlanner::dump_inform() }); } -} // namespace luci +} // namespace circle_planner diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h index 8e3d9b46a..e0833c407 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.h +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h @@ -17,10 +17,15 @@ #ifndef CIRCLE_EXECUTION_PLANNER_H #define CIRCLE_EXECUTION_PLANNER_H +#include "TargetPlatform.h" +#include "IScratchpadHelper.h" +#include "ScratchpadHelperLinux.h" +#include "ScratchpadHelperMCU.h" +#include "ScratchpadHelperCMSISNN.h" #include <luci/IR/Module.h> #include <luci/Plan/CircleNodeExecutionPlan.h> -namespace luci +namespace circle_planner { // struct for additional information for the node. it helps build allocations plan for nodes. struct AllocationNodeInformation @@ -50,7 +55,7 @@ struct AllocationNodeInformation uint32_t last_node; // is the current node temporary or not bool is_temp; - // operation breadth of current node + // Breadth is a sum of live tensors sizes at the moment of execution of given node uint32_t breadth; bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; } @@ -60,12 +65,44 @@ class ExecutionPlanner { public: ExecutionPlanner() = delete; - explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; }; + explicit ExecutionPlanner(loco::Graph *graph) : _graph(graph) + { + _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>(); + } + + explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph) + { + switch (target_platform.platform_type) + { + case LINUX: + _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>(); + break; + case MCU: + _scratchpad_helper = std::make_unique<ScratchpadHelperMCU>(); + break; + case CMSISNN: + _scratchpad_helper = std::make_unique<ScratchpadHelperCMSISNN>(target_platform.use_dsp); + break; + default: + assert(false && "Use unsupported platform"); + } + }; // Method provides execution plan, which contains execution order and // memory offsets for all nodes in _graph. // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class. - void get_execution_plan(); + void make_execution_plan(); + + // Method change planning mode: + // is_null_consts = true - constants are no longer taken into account when planning + // is_null_inputs = true - input are no longer taken into account when planning + // is_null_scratchpads = true - scratchpads are no longer taken into account when planning + void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads) + { + _is_null_consts = is_null_consts; + _is_null_inputs = is_null_inputs; + _is_null_scratchpads = is_null_scratchpads; + }; private: // Method gets default execution order plan and saves it in _ordered_nodes vector. @@ -83,18 +120,19 @@ private: // Return: required size of buffer. uint32_t get_offsets_with_greedy_by_size(); - // Realization of greedy by size approach to find offsets for nodes. + // Realization of greedy by size approach (algorithm is mentioned in + // "EFFICIENT MEMORY MANAGEMENT FOR DEEP NEURAL NET INFERENCE" paper) to find offsets for nodes. uint32_t greedy_by_size_approach(); // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes. // null_consts = true - size of const nodes will be equal 0; // null_inputs = true - size of input nodes will be equal 0; - // null_im2col = true - size of im2col nodes will be equal 0; - // It using if we don't want to take input(const or im2col) nodes into account + // null_scratchpad = true - size of scratchpad nodes will be equal 0; + // It using if we don't want to take input(const or scratchpads) nodes into account // when determining offsets and calculating the required buffer size. This is uses for // experiments. void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false, - bool null_im2col = false); + bool null_scratchpad = false); // Stores allocation additional information for the all nodes from _graph. std::vector<AllocationNodeInformation> _alloc_node_inform_vector; @@ -121,10 +159,21 @@ private: loco::Graph *_graph; + // Calculate size of scratchpad tensors for current platform + std::unique_ptr<IScratchpadHelper> _scratchpad_helper; + // Required memory size. uint32_t _required_size = 0; + + // Flags for choosing different planning modes: + // _is_null_consts = true - constants are no longer taken into account when planning + // _is_null_inputs = true - input are no longer taken into account when planning + // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning + bool _is_null_consts = false; + bool _is_null_inputs = false; + bool _is_null_scratchpads = false; }; -} // namespace luci +} // namespace circle_planner #endif // CIRCLE_EXECUTION_PLANNER_H diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt index d0775ea2d..10d26d191 100644 --- a/compiler/circle-inspect/CMakeLists.txt +++ b/compiler/circle-inspect/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_circle) +if(NOT TARGET mio_circle04) return() -endif(NOT TARGET mio_circle) +endif(NOT TARGET mio_circle04) set(DRIVER "driver/Driver.cpp") @@ -10,5 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES}) target_include_directories(circle-inspect PRIVATE src) target_link_libraries(circle-inspect arser) target_link_libraries(circle-inspect foder) -target_link_libraries(circle-inspect mio_circle) +target_link_libraries(circle-inspect mio_circle04) +target_link_libraries(circle-inspect mio_circle04_helper) target_link_libraries(circle-inspect safemain) diff --git a/compiler/circle-inspect/README.md b/compiler/circle-inspect/README.md index 1f76c8ede..94eea7b08 100644 --- a/compiler/circle-inspect/README.md +++ b/compiler/circle-inspect/README.md @@ -20,3 +20,19 @@ ADD ``` To get the count of specific operator, use other tools like sort, uniq, etc. + +Operators with `--tensor_dtype` +- show name and dtype of each tensor one line at a time + +Example +``` +$ circle-inspect --tensor_dtype quantized_conv2d.circle +``` + +Result +``` +ifm UINT8 +weights UINT8 +bias INT32 +ofm UINT8 +``` diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp index a450fd9e0..10e185de5 100644 --- a/compiler/circle-inspect/driver/Driver.cpp +++ b/compiler/circle-inspect/driver/Driver.cpp @@ -35,6 +35,7 @@ int entry(int argc, char **argv) .nargs(0) .help("Dump Conv2D series weight operators in circle file"); arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file"); + arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors"); arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect"); try @@ -48,7 +49,8 @@ int entry(int argc, char **argv) return 255; } - if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"]) + if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] && + !arser["--tensor_dtype"]) { std::cout << "At least one option must be specified" << std::endl; std::cout << arser; @@ -63,6 +65,8 @@ int entry(int argc, char **argv) dumps.push_back(std::make_unique<circleinspect::DumpConv2DWeight>()); if (arser["--op_version"]) dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>()); + if (arser["--tensor_dtype"]) + dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>()); std::string model_file = arser.get<std::string>("circle"); diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake index 81e0f0dbd..362d67cf4 100644 --- a/compiler/circle-inspect/requires.cmake +++ b/compiler/circle-inspect/requires.cmake @@ -1,3 +1,3 @@ require("arser") -require("mio-circle") +require("mio-circle04") require("safemain") diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp index 5c71afb3f..bba5e56c3 100644 --- a/compiler/circle-inspect/src/Dump.cpp +++ b/compiler/circle-inspect/src/Dump.cpp @@ -175,3 +175,28 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model) } } // namespace circleinspect + +namespace circleinspect +{ + +void DumpTensorDType::run(std::ostream &os, const circle::Model *model) +{ + circleinspect::Reader reader(model); + + const uint32_t subgraph_size = reader.num_subgraph(); + + for (uint32_t g = 0; g < subgraph_size; g++) + { + reader.select_subgraph(g); + auto tensors = reader.tensors(); + + for (uint32_t i = 0; i < tensors->Length(); ++i) + { + const auto tensor = tensors->Get(i); + + os << reader.tensor_name(tensor) << " " << reader.tensor_dtype(tensor) << std::endl; + } + } +} + +} // namespace circleinspect diff --git a/compiler/circle-inspect/src/Dump.h b/compiler/circle-inspect/src/Dump.h index 996c421f9..8ca6838d1 100644 --- a/compiler/circle-inspect/src/Dump.h +++ b/compiler/circle-inspect/src/Dump.h @@ -60,6 +60,15 @@ public: void run(std::ostream &os, const circle::Model *model); }; +class DumpTensorDType final : public DumpInterface +{ +public: + DumpTensorDType() = default; + +public: + void run(std::ostream &os, const circle::Model *model); +}; + } // namespace circleinspect #endif // __DUMP_H__ diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp index 7807db38a..0e2865254 100644 --- a/compiler/circle-inspect/src/Reader.cpp +++ b/compiler/circle-inspect/src/Reader.cpp @@ -16,66 +16,14 @@ #include "Reader.h" +#include <mio_circle/Helper.h> + #include <sstream> #include <string> namespace circleinspect { -bool is_valid(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_custom(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (code == circle::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const circle::OperatorCode *opcode) -{ - assert(opcode); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (!opcode->custom_code()) - return "(invalid custom)"; - - std::string custom_op = "CUSTOM("; - custom_op += opcode->custom_code()->c_str(); - custom_op += ")"; - return custom_op; - } - - circle::BuiltinOperator code = opcode->builtin_code(); - return circle::EnumNameBuiltinOperator(code); -} - -const char *tensor_type(const circle::Tensor *tensor) -{ - return circle::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const circle::Tensor *tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - auto name = tensor->name(); - if (name) - return name->c_str(); - - return kEmptyTensorName; -} - Reader::Reader(const circle::Model *model) { _subgraphs = model->subgraphs(); @@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return mio::circle::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const circle::Operator *op) const @@ -131,14 +79,24 @@ std::string Reader::opcode_name(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::circle::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - return circleinspect::opcode_name(opcode); + return mio::circle::opcode_name(opcode); +} + +std::string Reader::tensor_name(const circle::Tensor *tensor) const +{ + return mio::circle::tensor_name(tensor); +} + +std::string Reader::tensor_dtype(const circle::Tensor *tensor) const +{ + return mio::circle::tensor_type(tensor); } bool Reader::select_subgraph(uint32_t sgindex) diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h index b5a99df3f..c38ec3990 100644 --- a/compiler/circle-inspect/src/Reader.h +++ b/compiler/circle-inspect/src/Reader.h @@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } -bool is_valid(const circle::OperatorCode *opcode); -bool is_custom(const circle::OperatorCode *opcode); -std::string opcode_name(const circle::OperatorCode *opcode); -const char *tensor_type(const circle::Tensor *tensor); -const char *tensor_name(const circle::Tensor *tensor); - /** * @brief Loads Circle file and provides helpers to access attributes */ @@ -71,6 +65,8 @@ public: size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); circle::BuiltinOperator builtin_code(const circle::Operator *op) const; std::string opcode_name(const circle::Operator *op) const; + std::string tensor_name(const circle::Tensor *tensor) const; + std::string tensor_dtype(const circle::Tensor *tensor) const; public: bool select_subgraph(uint32_t subgraph); diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md index c06899ab5..5ea2d32c4 100644 --- a/compiler/circle-opselector/README.md +++ b/compiler/circle-opselector/README.md @@ -1,21 +1,21 @@ -# circle-opselector
-
-`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
-
-## Example
-
-### 1. Select from location numbers
-
-```bash
-./circle-opselector --by_id "1-3,5" input.circle output.circle
-```
-
-Then, output.circle which has node 1, 2, 3 and 5 will be created.
-
-### 2. Select from node names
-
-```bash
-./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
-```
-
-Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
+# circle-opselector + +`circle-opselector` is a tool for creating new circle models by selecting nodes from a model. + +## Example + +### 1. Select from location numbers + +```bash +./circle-opselector --by_id "1-3,5" input.circle output.circle +``` + +Then, output.circle which has node 1, 2, 3 and 5 will be created. + +### 2. Select from node names + +```bash +./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle +``` + +Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created. diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt index 1cfbcbd9b..0657607d2 100644 --- a/compiler/circle-part-value-test/CMakeLists.txt +++ b/compiler/circle-part-value-test/CMakeLists.txt @@ -82,8 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON} - COMMAND circle_partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}" - DEPENDS circle_partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH} + COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}" + DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON}) @@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps) add_test(NAME circle_part_value_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh" "${CMAKE_CURRENT_BINARY_DIR}" - "${NNCC_OVERLAY_DIR}/venv_2_6_0" + "${NNCC_OVERLAY_DIR}/venv_2_8_0" "$<TARGET_FILE:circle_part_driver>" ${PARTITION_LIST} ) diff --git a/compiler/circle-part-value-test/part_eval_one.py b/compiler/circle-part-value-test/part_eval_one.py index 91e32d78f..44661c78b 100755 --- a/compiler/circle-part-value-test/part_eval_one.py +++ b/compiler/circle-part-value-test/part_eval_one.py @@ -53,21 +53,37 @@ except: interpreter = tf.lite.Interpreter(tflite_model) interpreter.allocate_tensors() +# Read SignatureDef and get output tensor id orders for remapping +full_signatures = interpreter._get_full_signature_list() +full_signatures_outputs_remap = None +if full_signatures != None: + signature_serving_default = full_signatures.get('serving_default', None) + if signature_serving_default != None: + signature_outputs = signature_serving_default['outputs'] + + full_signatures_outputs_remap = [] + for index, (key, value) in enumerate(signature_outputs.items()): + full_signatures_outputs_remap.append(value) + # Generate random input data. num_inputs = len(interpreter.get_input_details()) for i in range(num_inputs): input_details = interpreter.get_input_details()[i] - if input_details["dtype"] == np.float32: + input_details_dtype = input_details["dtype"] + input_details_shape = input_details["shape"] + if input_details_dtype == np.float32: input_data = np.array( - np.random.random_sample(input_details["shape"]), input_details["dtype"]) - elif input_details["dtype"] == np.uint8: + np.random.random_sample(input_details_shape), input_details_dtype) + elif input_details_dtype == np.int16: input_data = np.array( - np.random.randint(0, 256, size=input_details["shape"]), - input_details["dtype"]) - elif input_details["dtype"] == np.bool_: + np.random.randint(0, 100, size=input_details_shape), input_details_dtype) + elif input_details_dtype == np.uint8: input_data = np.array( - np.random.choice(a=[True, False], size=input_details["shape"]), - input_details["dtype"]) + np.random.randint(0, 256, size=input_details_shape), input_details_dtype) + elif input_details_dtype == np.bool_: + input_data = np.array( + np.random.choice(a=[True, False], size=input_details_shape), + input_details_dtype) else: raise SystemExit("Unsupported input dtype") @@ -90,52 +106,42 @@ print("", flush=True) subprocess.run(partition_command, check=True) # Compare the results. -for idx in range(len(interpreter.get_output_details())): - output_details = interpreter.get_output_details()[idx] - output_data = np.fromfile(circle_model + ".output" + str(idx), - output_details["dtype"]) +inpt_output_details = interpreter.get_output_details() +for idx in range(len(inpt_output_details)): + output_details = inpt_output_details[idx] + output_dtype = output_details["dtype"] + output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype) shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r') output_shape = [int(i) for i in shape_file.read().split(',')] luci_output_data = np.reshape(output_data, output_shape) + output_tensor = output_details["index"] + if full_signatures_outputs_remap != None: + output_tensor = full_signatures_outputs_remap[idx] + intp_output_data = interpreter.get_tensor(output_tensor) try: - if output_details["dtype"] == np.uint8: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if output_dtype == np.uint8: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) - elif output_details["dtype"] == np.float32: + elif output_dtype == np.float32: if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=1.e-5, - atol=1.e-5) == False: + luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) - elif output_details["dtype"] == np.int64: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + elif output_dtype == np.int64: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) - elif output_details["dtype"] == np.int32: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + elif output_dtype == np.int32: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + tflite_model + + " does not match with " + circle_model) + elif output_dtype == np.int16: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) else: - raise SystemExit("Unsupported data type: ", output_details["dtype"]) + raise SystemExit("Unsupported data type: ", output_dtype) except: print(traceback.format_exc()) quit(255) diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part new file mode 100644 index 000000000..496971e55 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=cpu +comply=opcode + +[OPCODE] +ADD=npu diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part new file mode 100644 index 000000000..9913fea96 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=cpu +comply=opcode + +[OPCODE] +UNPACK=npu diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part new file mode 100644 index 000000000..c63efc592 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +UNPACK=cpu diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part new file mode 100644 index 000000000..ad0842165 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +MUL=npu diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part new file mode 100644 index 000000000..c82b741b0 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +SQRT=cpu diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part new file mode 100644 index 000000000..d9d2a8e59 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +FULLY_CONNECTED=cpu diff --git a/compiler/circle-part-value-test/parts/Part_Split_Add_000.part b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part new file mode 100644 index 000000000..91af566cd --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +SPLIT=cpu diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part new file mode 100644 index 000000000..d4d439d27 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,acl_cl +default=cpu +comply=opcode + +[OPCODE] +DIV=acl_cl diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part new file mode 100644 index 000000000..dbd174ee1 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +TANH=cpu diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part new file mode 100644 index 000000000..475439a9d --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=cpu +comply=opcode + +[OPCODE] +FULLY_CONNECTED=npu diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part new file mode 100644 index 000000000..d9d2a8e59 --- /dev/null +++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,npu +default=npu +comply=opcode + +[OPCODE] +FULLY_CONNECTED=cpu diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part new file mode 100644 index 000000000..e469eeb26 --- /dev/null +++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,acl_cl +default=cpu +comply=opcode + +[OPCODE] +MAXIMUM=acl_cl diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part new file mode 100644 index 000000000..e469eeb26 --- /dev/null +++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,acl_cl +default=cpu +comply=opcode + +[OPCODE] +MAXIMUM=acl_cl diff --git a/compiler/circle-part-value-test/test.lst b/compiler/circle-part-value-test/test.lst index af2f5ba5c..b7a3f403a 100644 --- a/compiler/circle-part-value-test/test.lst +++ b/compiler/circle-part-value-test/test.lst @@ -35,3 +35,24 @@ add(Part_If_Add_Sub_001 Part_If_Add_Sub_001.001 3) # WHILE with subgraphs add(Part_While_000 Part_While_000 3) add(Part_While_001 Part_While_001 3) + +# UNPACK with multiple outputs +add(Net_UnpackAdd_001 Net_UnpackAdd_001 2) +add(Net_UnpackAdd_001 Net_UnpackAdd_001.001 2) +add(Net_UnpackAdd_001 Net_UnpackAdd_001.002 2) + +# Other multiple outputs +add(Part_Split_Add_000 Part_Split_Add_000 2) + +# test SignatureDef, with any OPCODE +add(SignatureDef_MultiOut_000 SignatureDef_MultiOut_000 0) +add(SignatureDef_MultiOut_001 SignatureDef_MultiOut_001 0) + +# FC with nobias +add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias 1) +add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_001 2) +add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_002 2) +add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_003 2) +add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_000 0) +add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_001 0) +add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_002 0) diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt index ed8c97948..e29a66b41 100644 --- a/compiler/circle-partitioner-test/CMakeLists.txt +++ b/compiler/circle-partitioner-test/CMakeLists.txt @@ -57,8 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json") add_custom_command(OUTPUT ${PART_CONN_JSON} - COMMAND circle_partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}" - DEPENDS circle_partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH} + COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}" + DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) # NOTE this is checked in build time and not added with 'add_test' command diff --git a/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part new file mode 100644 index 000000000..01b8c704e --- /dev/null +++ b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part @@ -0,0 +1,7 @@ +[partition] +backends=cpu,acl_cl +default=cpu +comply=opcode + +[OPCODE] +ADD=acl_cl diff --git a/compiler/circle-partitioner-test/test.lst b/compiler/circle-partitioner-test/test.lst index b731f8d0e..c0c185c7e 100644 --- a/compiler/circle-partitioner-test/test.lst +++ b/compiler/circle-partitioner-test/test.lst @@ -5,3 +5,7 @@ # add(RECIPE_NAME PART_NAME) add(Net_InstanceNorm_003 Net_InstanceNorm_003) + +# NOTE SVDF partition test is done here as value test may need custom tolerance +# TODO move Part_Add_SVDF_000 to circle-part-value-test when ready +add(Part_Add_SVDF_000 Part_Add_SVDF_000) diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt index 28a16c9fc..9b8f5afae 100644 --- a/compiler/circle-partitioner/CMakeLists.txt +++ b/compiler/circle-partitioner/CMakeLists.txt @@ -1,5 +1,24 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") +add_executable(circle-partitioner "${SOURCES}") +target_link_libraries(circle-partitioner foder) +target_link_libraries(circle-partitioner crew) +target_link_libraries(circle-partitioner safemain) +target_link_libraries(circle-partitioner luci_lang) +target_link_libraries(circle-partitioner luci_log) +target_link_libraries(circle-partitioner luci_import) +target_link_libraries(circle-partitioner luci_service) +target_link_libraries(circle-partitioner luci_pass) +target_link_libraries(circle-partitioner luci_export) +target_link_libraries(circle-partitioner luci_partition) +target_link_libraries(circle-partitioner arser) +target_link_libraries(circle-partitioner pepper_csv2vec) +target_link_libraries(circle-partitioner vconone) +target_link_libraries(circle-partitioner nncc_common) + +install(TARGETS circle-partitioner DESTINATION bin) + +# TODO remove circle_partitioner add_executable(circle_partitioner "${SOURCES}") target_link_libraries(circle_partitioner foder) target_link_libraries(circle_partitioner crew) diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md index 5fd312e33..2e0a98638 100644 --- a/compiler/circle-partitioner/README.md +++ b/compiler/circle-partitioner/README.md @@ -94,7 +94,7 @@ Net_InstanceNorm_003/ Command example ``` -./circle_partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003 +./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003 ``` Result of _circle-partitioner_ @@ -163,3 +163,131 @@ as the `source` model: `[ "Input" ]`. `Net_InstanceNorm_003.00002_acl_cl.circle` which they should be connected. - And `outputs` `[ "Div" ]` should be connected to `inputs` of third model `Net_InstanceNorm_003.00003_cpu.circle`. + +### Execution example + +Consider partitioning with backends of OneRT +- `cpu`, `acl_cl`, `acl_neon`, `ruy`, `xnnpack` + +Let's try with this command: +``` +circle_partitioner \ + --partition Net_InstanceNorm_003.part \ + --backends cpu,acl_cl \ + --default cpu \ + Net_InstanceNorm_003.circle Net_InstanceNorm_003 +``` + +where `Net_InstanceNorm_003.part` is like this for initial design +``` +[partition] +backends=cpu,acl_cl +default=cpu +comply=opcode + +[OPCODE] +ADD=acl_cl +``` +where in `[partition]` section, +- `backends` is available backends and can be overridden by `--backends` +- `default` is default backend for OpCodes not assigned in `[OPCODE]` section can be overridden by `--default` +- `comply` is which rule to apply, where only `opcode` is available for now + +#### Use Op name to assign backend + +``` +[OP] +Reduction_indices=GPU +``` +- there are very long names that may be inconvenient + +### Partitioned output + +#### Output files + +After partition is applied, output files will look something like these +- `Net_InstanceNorm_003.part.00001_cpu.circle` +- `Net_InstanceNorm_003.part.00002_acl_cl.circle` +- `Net_InstanceNorm_003.part.00003_cpu.circle` +- `Net_InstanceNorm_003.part.conn.ini` +- `Net_InstanceNorm_003.part.conn.json` + +Assume only `Div` node is assigned to `acl_cl` + +#### Connection information of partitioned circle files + +##### Format with ini +- `Net_InstanceNorm_003.conn.ini` provides connection of each circle files. +``` +[source] +file=Net_InstanceNorm_003.circle +i1=Input +o1=Add_as_terminal + +[models] +m1=Net_InstanceNorm_003.part.00001_cpu.circle +m2=Net_InstanceNorm_003.part.00002_acl_cl.circle +m3=Net_InstanceNorm_003.part.00003_cpu.circle + +[Net_InstanceNorm_003.part.00001_cpu.circle] +file=Net_InstanceNorm_003.part.00001_cpu.circle +i1=Input +o1=Pow +o2=Sub + +[Net_InstanceNorm_003.part.00002_acl_cl.circle] +file=Net_InstanceNorm_003.part.00002_acl_cl.circle +i1=Sub +i2=Pow +o1=Div + +[Net_InstanceNorm_003.part.00003_cpu.circle] +file=Net_InstanceNorm_003.part.00003_cpu.circle +i1=Div +o1=Add_as_terminal +``` + +Predefined section +- `source`: Source circle model information. Has `file` as filename, `iN` for inputs and `oN` for outputs. +- `models`: Partitioned circle models. Has `mN` for model filename. + +Partitioned Model section +- `iN`: inputs of this model +- `oN`: outputs of this model + +In graph diagram, output order of `Net_InstanceNorm_003.part.00001_cpu.circle` +looks like `Pow,Sub` but `Div` Op in `Net_InstanceNorm_003.part.00002_acl_cl.circle` +requires order of `Sub,Pow`. + +##### Format with JSON +- Use JSON format, `Net_InstanceNorm_003.part.conn.json` +```json +{ + "source" : { + "file" : "Net_InstanceNorm_003.circle", + "inputs" : [ "Input" ], + "outputs" : [ "Add_as_terminal" ] + }, + "parts" : [ + { + "file" : "Net_InstanceNorm_003.part.00001_cpu.circle", + "inputs" : [ "Input" ], + "outputs" : [ "Pow", "Sub" ], + }, + { + "file" : "Net_InstanceNorm_003.part.00002_acl_cl.circle", + "inputs" : [ "Pow", "Sub" ], + "outputs" : [ "Div" ] + }, + { + "file" : "Net_InstanceNorm_003.part.00003_cpu.circle", + "inputs" : [ "Div" ], + "outputs" : [ "Add_as_terminal" ] + } + ] +} +``` + +### Future works + +How to partition with multiple inputs? diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt new file mode 100644 index 000000000..5ec8b6ee5 --- /dev/null +++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt @@ -0,0 +1,144 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_include(TargetRequire) + +unset(REQUIRED_TARGETS) +list(APPEND REQUIRED_TARGETS circle-inspect) +list(APPEND REQUIRED_TARGETS circle-verify) +list(APPEND REQUIRED_TARGETS circle-quantizer) +list(APPEND REQUIRED_TARGETS record-minmax) +list(APPEND REQUIRED_TARGETS dredd_rule_lib) +TargetRequire_Return(${REQUIRED_TARGETS}) + +unset(TEST_DEPS) +unset(TEST_NAMES) + +get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) + +set(options USE_QCONFIG) +set(oneValueArgs DTYPE GRANULARITY) +set(multiValueArgs "") + +macro(Add RECIPE) + cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(QCONFIG_OPT "") + if(ARG_USE_QCONFIG) + set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json") + endif() + + set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle") + set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle") + set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle") + set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle") + + # Generate quantized .circle + add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH} + COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH} + COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH} + COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH} + DEPENDS + circle-quantizer + record-minmax + ${CIRCLE_PATH} + COMMENT "Generate ${RECIPE}.q.circle" + ) + + list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH}) + list(APPEND TEST_NAMES ${RECIPE}) +endmacro(Add) + +# Macro to generate fully fake-quantized models +macro(AddFakeQuant RECIPE) + set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle") + # NOTE We use .q.circle because it is convention for output file (see testall.sh for more details) + set(FULL_FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle") + + # Generate fully fake-quantized .circle + add_custom_command(OUTPUT ${FULL_FAKE_QUANT_CIRCLE_PATH} + COMMAND $<TARGET_FILE:circle-quantizer> --fake_quantize ${CIRCLE_PATH} ${FULL_FAKE_QUANT_CIRCLE_PATH} + DEPENDS + circle-quantizer + ${CIRCLE_PATH} + COMMENT "Generate ${RECIPE}.q.circle" + ) + + list(APPEND TEST_DEPS ${FULL_FAKE_QUANT_CIRCLE_PATH}) + list(APPEND TEST_NAMES ${RECIPE}) +endmacro(AddFakeQuant) + +# Read "test.lst" +include("test.lst") + +## +## Copy testall +## +set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh") +set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh") + +add_custom_command( + OUTPUT ${TEST_RUNNER} + COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}" + DEPENDS ${TEST_RUNNER_SOURCE} + COMMENT "Generate test runner" +) + +list(APPEND TEST_DEPS "${TEST_RUNNER}") + +### +### Generate test.config +### +set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config") + +add_custom_command( + OUTPUT ${TEST_CONFIG} + COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG} + COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TEST_CONFIG} + COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TEST_CONFIG} + COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax>\"' >> ${TEST_CONFIG} + COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_QUANTIZER_PATH=\"$<TARGET_FILE:circle-quantizer>\"' >> ${TEST_CONFIG} + DEPENDS + circle-inspect + circle-verify + record-minmax + circle-quantizer + COMMENT "Generate test configuration" +) + +list(APPEND TEST_DEPS "${TEST_CONFIG}") + +# +# copy rule-lib.sh (a library of shell script functions) +# + +# getting path for rule-lib.sh in dredd-rule-lib +get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR) + +set(RULE_LIB_SOURCE_PATH "${DREDD_RULE_LIB_DIR}/rule-lib.sh") +set(RULE_LIB_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh") + +add_custom_command( + OUTPUT ${RULE_LIB_BINARY_PATH} + COMMAND ${CMAKE_COMMAND} -E copy "${RULE_LIB_SOURCE_PATH}" "${RULE_LIB_BINARY_PATH}" + DEPENDS ${RULE_LIB_SOURCE_PATH} + COMMENT "Generate rule lib" +) + +list(APPEND TEST_DEPS "${RULE_LIB_BINARY_PATH}") + +# Generate dependencies +add_custom_target(circle_quantizer_dredd_recipe_test ALL DEPENDS ${TEST_DEPS}) +add_dependencies(circle_quantizer_dredd_recipe_test common_artifacts_deps) + +get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) + +# Run tests +add_test( + NAME circle_quantizer_dredd_recipe_test + COMMAND ${TEST_RUNNER} + ${TEST_CONFIG} + ${ARTIFACTS_BIN_PATH} + ${TEST_NAMES} +) diff --git a/compiler/circle-quantizer-dredd-recipe-test/README.md b/compiler/circle-quantizer-dredd-recipe-test/README.md new file mode 100644 index 000000000..61525495a --- /dev/null +++ b/compiler/circle-quantizer-dredd-recipe-test/README.md @@ -0,0 +1,37 @@ +# circle-quantizer-dredd-recipe-test + +It tests non-functional conditions of a quantized circle model generated by circle-quantizer. + +## How to add a test? + +1. Create a directory under `res/TensorFlowLiteRecipes/` or `res/CircleRecipes/`. + +2. Make a recipe (`test.recipe`) for fp32 model under the directory. + +3. Make a rule (`test.rule`) you want to test under the directory. (For more information on dredd-test-rules, see _dredd-rule-lib_ module.) + +4. Add test to `test.lst` in this module with `Add` macro. + +``` +Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG) +``` + +- `RECIPE_DIR`: Path to the directory where the recipe file is saved. +- `DTYPE`: Default quantization dtype (uint8, int16) +- `GRANULARITY`: Quantization granularity (channel, layer) +- `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not. If this is set, `test.qconf.json` should exist under `RECIPE_DIR` + +## Example + +``` +# TensorFlowLiteRecipes +res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000 +├── test.recipe # What you want to test +└── test.rule # Non-functional conditions to be satisfied +└── test.qconf.json # Quantization configuration file (optional) + +# test.lst +... +Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +... +``` diff --git a/compiler/circle-quantizer-dredd-recipe-test/requires.cmake b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake new file mode 100644 index 000000000..7450f7322 --- /dev/null +++ b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake @@ -0,0 +1,6 @@ +require("circle-quantizer") +require("record-minmax") +require("circle-inspect") +require("circle-verify") +require("common-artifacts") +require("dredd-rule-lib") diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst new file mode 100644 index 000000000..188103016 --- /dev/null +++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst @@ -0,0 +1,15 @@ +## EXAMPLE +# +# Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG(optional)) +# AddFakeQuant(RECIPE_DIR) +# + +## TFLITE RECIPE + +Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) + +AddFakeQuant(Quant_Add_000) diff --git a/compiler/circle-quantizer-dredd-recipe-test/testall.sh b/compiler/circle-quantizer-dredd-recipe-test/testall.sh new file mode 100755 index 000000000..e5d5cf2b8 --- /dev/null +++ b/compiler/circle-quantizer-dredd-recipe-test/testall.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Need at least 2 arguments +if [[ $# -lt 2 ]]; then + echo "USAGE: $0 ..." + echo + echo "ARGUMENTS:" + echo " [test.config path]" + echo " [WORKDIR]" + echo " [Prefix1]" + echo " [Prefix2]" + echo " ..." + exit 255 +fi + +WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +CONFIG_PATH="$1"; shift +RESOURCE_DIR="$1"; shift + +source "${CONFIG_PATH}" + +echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}" +echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}" +echo "-- Found circle-quantizer: ${CIRCLE_QUANTIZER_PATH}" +echo "-- Found record-minmax: ${RECORD_MINMAX_PATH}" +echo "-- Found common-artifacts: ${RESOURCE_DIR}" + +TESTED=() +PASSED=() +FAILED=() + +pushd ${WORKDIR} +while [[ $# -ne 0 ]]; do + PREFIX="$1"; shift + + TESTED+=("${PREFIX}") + + PASSED_TAG="${PREFIX}.passed" + + rm -f "${PASSED_TAG}" + + cat > "${PREFIX}.log" <( + exec 2>&1 + + echo "-- Found circle: ${PREFIX}.q.circle" + + # Exit immediately if any command fails + set -e + # Show commands + set -x + + # + # Check if rule is satisfied + # + + # Note: turn off 'command printing'. Otherwise printing will be so messy + set +x + + # (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh + COMPILED_FILE="${PREFIX}.q.circle" + INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH} + VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH} + ERROR_LOG="${PREFIX}.error" + + rm -f "${ERROR_LOG}" + + # in case error while running rule-lib.sh, prints error msg + trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR + + source rule-lib.sh + source "${RESOURCE_DIR}/${PREFIX}.rule" + + # unset + trap - ERR + set -x + + # At this point, the exit code of all commands is 0 + # If not 0, execution of this script ends because of "set -e" + touch "${PASSED_TAG}" + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("$PREFIX") + else + FAILED+=("$PREFIX") + fi +done +popd + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt index a5f5f61c4..14e00972b 100644 --- a/compiler/circle-quantizer/CMakeLists.txt +++ b/compiler/circle-quantizer/CMakeLists.txt @@ -1,11 +1,19 @@ +nnas_find_package(Jsoncpp) +if(NOT Jsoncpp_FOUND) + message(STATUS "Build jsoncpp: FAILED (missing jsoncpp)") + return() +endif(NOT Jsoncpp_FOUND) + set (SOURCES src/CircleQuantizer.cpp) add_executable(circle-quantizer "${SOURCES}") +target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS}) + +target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB}) target_link_libraries(circle-quantizer foder) target_link_libraries(circle-quantizer safemain) target_link_libraries(circle-quantizer oops) target_link_libraries(circle-quantizer loco) -target_link_libraries(circle-quantizer mio_circle) target_link_libraries(circle-quantizer luci_import) target_link_libraries(circle-quantizer luci_service) target_link_libraries(circle-quantizer luci_pass) diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index 57ac30a87..e0c85cb6e 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -17,7 +17,7 @@ #include <foder/FileLoader.h> #include <luci/Importer.h> -#include <luci/CircleOptimizer.h> +#include <luci/CircleQuantizer.h> #include <luci/Service/Validate.h> #include <luci/CircleExporter.h> #include <luci/CircleFileExpContract.h> @@ -26,6 +26,7 @@ #include <oops/InternalExn.h> #include <arser/arser.h> #include <vconone/vconone.h> +#include <json.h> #include <functional> #include <iostream> @@ -34,8 +35,41 @@ using OptionHook = std::function<int(const char **)>; -using Algorithms = luci::CircleOptimizer::Options::Algorithm; -using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters; +using LayerParam = luci::CircleQuantizer::Options::LayerParam; +using Algorithms = luci::CircleQuantizer::Options::Algorithm; +using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters; + +std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename) +{ + Json::Value root; + std::ifstream ifs(filename); + + // Failed to open cfg file + if (not ifs.is_open()) + throw std::runtime_error("Cannot open config file. " + filename); + + Json::CharReaderBuilder builder; + JSONCPP_STRING errs; + + // Failed to parse + if (not parseFromStream(builder, ifs, &root, &errs)) + throw std::runtime_error("Cannot parse config file (json format). " + errs); + + auto layers = root["layers"]; + std::vector<std::shared_ptr<LayerParam>> p; + for (auto layer : layers) + { + auto l = std::make_shared<LayerParam>(); + { + l->name = layer["name"].asString(); + l->dtype = layer["dtype"].asString(); + l->granularity = layer["granularity"].asString(); + } + p.emplace_back(l); + } + + return p; +} void print_exclusive_options(void) { @@ -56,15 +90,20 @@ int entry(int argc, char **argv) { // Simple argument parser (based on map) std::map<std::string, OptionHook> argparse; - luci::CircleOptimizer optimizer; + luci::CircleQuantizer quantizer; - auto options = optimizer.options(); + auto options = quantizer.options(); auto settings = luci::UserSettings::settings(); const std::string qdqw = "--quantize_dequantize_weights"; const std::string qwmm = "--quantize_with_minmax"; const std::string rq = "--requantize"; const std::string fq = "--force_quantparam"; + const std::string cq = "--copy_quantparam"; + const std::string fake_quant = "--fake_quantize"; + const std::string cfg = "--config"; + + const std::string tf_maxpool = "--TF-style_maxpool"; const std::string gpd = "--generate_profile_data"; @@ -99,6 +138,19 @@ int entry(int argc, char **argv) "Three arguments required: input_model_dtype(float32) " "output_model_dtype(uint8) granularity(layer, channel)"); + arser.add_argument(tf_maxpool) + .nargs(0) + .required(false) + .default_value(false) + .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can " + "degrade accuracy of some models"); + + arser.add_argument(fake_quant) + .nargs(0) + .required(false) + .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will " + "generate an fp32 model."); + arser.add_argument(rq) .nargs(2) .type(arser::DataType::STR_VEC) @@ -116,6 +168,15 @@ int entry(int argc, char **argv) "Three arguments required: tensor_name(string), " "scale(float) zero_point(int)"); + arser.add_argument(cq) + .nargs(2) + .type(arser::DataType::STR_VEC) + .required(false) + .accumulated(true) + .help("Copy quantization parameter from a tensor to another tensor." + "Two arguments required: source_tensor_name(string), " + "destination_tensor_name(string)"); + arser.add_argument("--input_type") .nargs(1) .type(arser::DataType::STR) @@ -128,6 +189,12 @@ int entry(int argc, char **argv) .required(false) .help("Output type of quantized model (uint8 or int16)"); + arser.add_argument(cfg) + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .help("Path to the quantization configuration file"); + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); @@ -146,11 +213,13 @@ int entry(int argc, char **argv) } { - // only one of qdqw, qwmm, rq, fq option can be used + // only one of qdqw, qwmm, rq, fq, cq, fake_quant option can be used int32_t opt_used = arser[qdqw] ? 1 : 0; opt_used += arser[qwmm] ? 1 : 0; opt_used += arser[rq] ? 1 : 0; opt_used += arser[fq] ? 1 : 0; + opt_used += arser[cq] ? 1 : 0; + opt_used += arser[fake_quant] ? 1 : 0; if (opt_used != 1) { print_exclusive_options(); @@ -178,6 +247,22 @@ int entry(int argc, char **argv) options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0)); options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1)); options->param(AlgorithmParameters::Quantize_granularity, values.at(2)); + + if (arser[cfg]) + { + auto filename = arser.get<std::string>(cfg); + try + { + auto layer_params = read_layer_params(filename); + + options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params); + } + catch (const std::runtime_error &e) + { + std::cerr << e.what() << '\n'; + return 255; + } + } } if (arser[qwmm]) @@ -201,6 +286,25 @@ int entry(int argc, char **argv) if (arser["--output_type"]) options->param(AlgorithmParameters::Quantize_output_type, arser.get<std::string>("--output_type")); + + if (arser[tf_maxpool] and arser.get<bool>(tf_maxpool)) + options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "True"); + + if (arser[cfg]) + { + auto filename = arser.get<std::string>(cfg); + try + { + auto layer_params = read_layer_params(filename); + + options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params); + } + catch (const std::runtime_error &e) + { + std::cerr << e.what() << '\n'; + return 255; + } + } } if (arser[rq]) @@ -245,6 +349,34 @@ int entry(int argc, char **argv) options->params(AlgorithmParameters::Quantize_zero_points, zero_points); } + if (arser[cq]) + { + auto values = arser.get<std::vector<std::vector<std::string>>>(cq); + + std::vector<std::string> src; + std::vector<std::string> dst; + + for (auto const value : values) + { + if (value.size() != 2) + { + std::cerr << arser; + return 255; + } + + src.push_back(value[0]); + dst.push_back(value[1]); + } + + options->enable(Algorithms::CopyQuantParam); + + options->params(AlgorithmParameters::Quantize_src_tensor_names, src); + options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst); + } + + if (arser[fake_quant]) + options->enable(Algorithms::ConvertToFakeQuantizedModel); + std::string input_path = arser.get<std::string>("input"); std::string output_path = arser.get<std::string>("output"); @@ -279,7 +411,7 @@ int entry(int argc, char **argv) auto graph = module->graph(idx); // quantize the graph - optimizer.quantize(graph); + quantizer.quantize(graph); if (!luci::validate(graph)) { diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt index 4524260c4..676aecd53 100644 --- a/compiler/circle-tensordump/CMakeLists.txt +++ b/compiler/circle-tensordump/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_circle) +if(NOT TARGET mio_circle04) return() -endif(NOT TARGET mio_circle) +endif(NOT TARGET mio_circle04) nnas_find_package(HDF5 COMPONENTS STATIC QUIET) @@ -19,7 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS}) target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES}) target_link_libraries(circle-tensordump PRIVATE arser) target_link_libraries(circle-tensordump PRIVATE foder) -target_link_libraries(circle-tensordump PRIVATE mio_circle) +target_link_libraries(circle-tensordump PRIVATE mio_circle04) +target_link_libraries(circle-tensordump PRIVATE mio_circle04_helper) target_link_libraries(circle-tensordump PRIVATE safemain) install(TARGETS circle-tensordump DESTINATION bin) diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake index 1c754f518..183dfe227 100644 --- a/compiler/circle-tensordump/requires.cmake +++ b/compiler/circle-tensordump/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-circle") +require("mio-circle04") require("safemain") diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp index 429736bfe..47b876054 100644 --- a/compiler/circle-tensordump/src/Reader.cpp +++ b/compiler/circle-tensordump/src/Reader.cpp @@ -16,66 +16,14 @@ #include "Reader.h" +#include <mio_circle/Helper.h> + #include <sstream> #include <string> namespace circletensordump { -bool is_valid(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_custom(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (code == circle::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const circle::OperatorCode *opcode) -{ - assert(opcode); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (!opcode->custom_code()) - return "(invalid custom)"; - - std::string custom_op = "CUSTOM("; - custom_op += opcode->custom_code()->c_str(); - custom_op += ")"; - return custom_op; - } - - circle::BuiltinOperator code = opcode->builtin_code(); - return circle::EnumNameBuiltinOperator(code); -} - -const char *tensor_type(const circle::Tensor *tensor) -{ - return circle::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const circle::Tensor *tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - auto name = tensor->name(); - if (name) - return name->c_str(); - - return kEmptyTensorName; -} - Reader::Reader(const circle::Model *model) { _subgraphs = model->subgraphs(); @@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return mio::circle::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const circle::Operator *op) const @@ -131,14 +79,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::circle::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - return circletensordump::opcode_name(opcode); + return mio::circle::opcode_name(opcode); } bool Reader::select_subgraph(uint32_t sgindex) diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h index bbb039552..c868bc277 100644 --- a/compiler/circle-tensordump/src/Reader.h +++ b/compiler/circle-tensordump/src/Reader.h @@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } -bool is_valid(const circle::OperatorCode *opcode); -bool is_custom(const circle::OperatorCode *opcode); -std::string opcode_name(const circle::OperatorCode *opcode); -const char *tensor_type(const circle::Tensor *tensor); -const char *tensor_name(const circle::Tensor *tensor); - /** * @brief Loads Circle file and provides helpers to access attributes */ diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt index f22174865..5d0eb9468 100644 --- a/compiler/circle-verify/CMakeLists.txt +++ b/compiler/circle-verify/CMakeLists.txt @@ -1,13 +1,14 @@ -if(NOT TARGET mio_circle) +if(NOT TARGET mio_circle04) + message(STATUS "Skip circle-verify: mio_circle04 not found") return() -endif(NOT TARGET mio_circle) +endif(NOT TARGET mio_circle04) file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circle-verify ${SOURCES}) target_include_directories(circle-verify PRIVATE src) target_link_libraries(circle-verify arser) -target_link_libraries(circle-verify mio_circle) +target_link_libraries(circle-verify mio_circle04) target_link_libraries(circle-verify safemain) target_link_libraries(circle-verify cwrap) target_link_libraries(circle-verify foder) diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake index e1b7fb212..74c8f448b 100644 --- a/compiler/circle-verify/requires.cmake +++ b/compiler/circle-verify/requires.cmake @@ -1,5 +1,5 @@ require("arser") -require("mio-circle") +require("mio-circle04") require("safemain") require("cwrap") require("foder") diff --git a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt index ee73d63e3..9ccfd0008 100644 --- a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt +++ b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt index 358fc4e2c..cd79967b7 100644 --- a/compiler/circle2circle/CMakeLists.txt +++ b/compiler/circle2circle/CMakeLists.txt @@ -11,7 +11,6 @@ target_link_libraries(circle2circle oops) target_link_libraries(circle2circle hermes) target_link_libraries(circle2circle hermes_std) target_link_libraries(circle2circle loco) -target_link_libraries(circle2circle mio_circle) target_link_libraries(circle2circle luci_env) target_link_libraries(circle2circle luci_import) target_link_libraries(circle2circle luci_service) @@ -36,7 +35,6 @@ target_link_libraries(circle2circle_test oops) target_link_libraries(circle2circle_test hermes) target_link_libraries(circle2circle_test hermes_std) target_link_libraries(circle2circle_test loco) -target_link_libraries(circle2circle_test mio_circle) target_link_libraries(circle2circle_test luci_env) target_link_libraries(circle2circle_test luci_import) target_link_libraries(circle2circle_test luci_service) diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake index 36a9efd16..b6c61198f 100644 --- a/compiler/circle2circle/requires.cmake +++ b/compiler/circle2circle/requires.cmake @@ -3,7 +3,6 @@ require("loco") require("locop") require("logo-core") require("safemain") -require("mio-circle") require("oops") require("hermes") require("hermes-std") diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index a5ddb26dc..ae677a321 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -104,6 +104,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will fold Depthwise Convolution operator with constant inputs"); + arser.add_argument("--fold_gather") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fold Gather operator"); + arser.add_argument("--fold_sparse_to_dense") .nargs(0) .required(false) @@ -203,6 +209,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will remove Quantize-Dequantize sequence"); + arser.add_argument("--remove_redundant_quantize") + .nargs(0) + .required(false) + .default_value(false) + .help("This will remove redundant Quantize operators"); + arser.add_argument("--remove_redundant_reshape") .nargs(0) .required(false) @@ -452,6 +464,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::FoldDequantize); if (arser.get<bool>("--fold_dwconv")) options->enable(Algorithms::FoldDepthwiseConv2D); + if (arser.get<bool>("--fold_gather")) + options->enable(Algorithms::FoldGather); if (arser.get<bool>("--fold_sparse_to_dense")) options->enable(Algorithms::FoldSparseToDense); if (arser.get<bool>("--forward_reshape_to_unaryop")) @@ -484,6 +498,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::RemoveFakeQuant); if (arser.get<bool>("--remove_quantdequant")) options->enable(Algorithms::RemoveQuantDequantSeq); + if (arser.get<bool>("--remove_redundant_quantize")) + options->enable(Algorithms::RemoveRedundantQuantize); if (arser.get<bool>("--remove_redundant_reshape")) options->enable(Algorithms::RemoveRedundantReshape); if (arser.get<bool>("--remove_redundant_transpose")) diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt index 3e2ddcbb3..b124d3027 100644 --- a/compiler/circlechef/CMakeLists.txt +++ b/compiler/circlechef/CMakeLists.txt @@ -1,12 +1,14 @@ nnas_find_package(Protobuf QUIET) if(NOT Protobuf_FOUND) + message(STATUS "circlechef: SKIP (missing Protobuf)") return() endif(NOT Protobuf_FOUND) -if(NOT TARGET mio_circle) +if(NOT TARGET mio_circle04) + message(STATUS "circlechef: SKIP (missing mio-circle04)") return() -endif(NOT TARGET mio_circle) +endif(NOT TARGET mio_circle04) # Recipe Parser add_subdirectory(proto) diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt index 98a284c30..12dc7217b 100644 --- a/compiler/circlechef/circle/CMakeLists.txt +++ b/compiler/circlechef/circle/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES}) target_include_directories(circlechef_circle PUBLIC include) target_include_directories(circlechef_circle PRIVATE src) target_link_libraries(circlechef_circle circlechef_proto) -target_link_libraries(circlechef_circle mio_circle) +target_link_libraries(circlechef_circle mio_circle04) +target_link_libraries(circlechef_circle mio_circle04_helper) target_link_libraries(circlechef_circle cwrap) target_link_libraries(circlechef_circle souschef) diff --git a/compiler/circlechef/circle/src/CircleImport.cpp b/compiler/circlechef/circle/src/CircleImport.cpp index e970fbce3..f8756ef94 100644 --- a/compiler/circlechef/circle/src/CircleImport.cpp +++ b/compiler/circlechef/circle/src/CircleImport.cpp @@ -18,38 +18,13 @@ #include "Convert.h" +#include <mio_circle/Helper.h> + #include <sstream> namespace circlechef { -const char *kEmptyTensorName = "(noname)"; - -const char *tensor_type(const circle::Tensor *tensor) -{ - return circle::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const circle::Tensor *tensor) -{ - auto name = tensor->name(); - if (name) - return name->c_str(); - return kEmptyTensorName; -} - -bool is_valid(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_custom(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (code == circle::BuiltinOperator_CUSTOM); -} - CircleImport::CircleImport(const circle::Model *model) { _subgraphs = model->subgraphs(); @@ -92,7 +67,7 @@ circle::BuiltinOperator CircleImport::builtin_code(const circle::Operator *op) c assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return mio::circle::builtin_code_neutral(opcode); } std::string CircleImport::opcode_name(const circle::Operator *op) const @@ -101,14 +76,14 @@ std::string CircleImport::opcode_name(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::circle::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - if (is_custom(opcode)) + if (mio::circle::is_custom(opcode)) { if (!opcode->custom_code()) return "(invalid custom)"; diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h index 23ca29beb..9c1d161b6 100644 --- a/compiler/circlechef/circle/src/CircleImport.h +++ b/compiler/circlechef/circle/src/CircleImport.h @@ -34,11 +34,6 @@ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>> using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>; using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>; -const char *tensor_type(const circle::Tensor *tensor); -const char *tensor_name(const circle::Tensor *tensor); -bool is_valid(const circle::OperatorCode *opcode); -bool is_custom(const circle::OperatorCode *opcode); - /** * @brief Loads TF lite file and provides helpers to access attributes */ diff --git a/compiler/circlechef/circle/src/RecipeChef.cpp b/compiler/circlechef/circle/src/RecipeChef.cpp index cd520cbc3..e21bca8a6 100644 --- a/compiler/circlechef/circle/src/RecipeChef.cpp +++ b/compiler/circlechef/circle/src/RecipeChef.cpp @@ -15,6 +15,7 @@ */ #include <circlechef/RecipeChef.h> +#include <mio_circle/Helper.h> #include "Convert.h" #include "CircleImport.h" @@ -42,7 +43,7 @@ void set_inputs(CircleImport *import, circlechef::Operation *operation, const ci else { auto tensor = tensors->Get(input); - std::string name = tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); operation->add_input(name); } } @@ -56,7 +57,7 @@ void set_outputs(CircleImport *import, circlechef::Operation *operation, const c for (auto output : outputs) { auto tensor = tensors->Get(output); - std::string name = tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); operation->add_output(name); } } @@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model) ::circlechef::Operand *operand = model_recipe->add_operand(); - operand->set_name(tensor_name(tensor)); + operand->set_name(mio::circle::tensor_name(tensor)); operand->set_type(as_circlechef_type(tensor->type())); std::vector<int32_t> dims = as_index_vector(tensor->shape()); @@ -224,14 +225,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model) for (const auto input : inputs) { auto tensor = tensors->Get(input); - std::string name = tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); model_recipe->add_input(name); } for (const auto output : outputs) { auto tensor = tensors->Get(output); - std::string name = tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); model_recipe->add_output(name); } diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt index 0e8f47483..415954767 100644 --- a/compiler/circlechef/core/CMakeLists.txt +++ b/compiler/circlechef/core/CMakeLists.txt @@ -7,7 +7,7 @@ target_include_directories(circlechef_core PUBLIC include) target_include_directories(circlechef_core PRIVATE src) target_link_libraries(circlechef_core PUBLIC circlechef_proto) target_link_libraries(circlechef_core PUBLIC circlechef_log) -target_link_libraries(circlechef_core PUBLIC mio_circle) +target_link_libraries(circlechef_core PUBLIC mio_circle04) target_link_libraries(circlechef_core PUBLIC souschef) target_link_libraries(circlechef_core PRIVATE nncc_coverage) diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp index 6975f42a3..6c5206dfc 100644 --- a/compiler/circlechef/core/src/ModelChef.cpp +++ b/compiler/circlechef/core/src/ModelChef.cpp @@ -520,6 +520,10 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe) for (auto const &opcode : builtin_code_map) { circle::OperatorCodeBuilder code_builder{*flatbuffer_builder}; + int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES + if (opcode.first < circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES) + dep_code = static_cast<int8_t>(opcode.first); + code_builder.add_deprecated_builtin_code(dep_code); code_builder.add_builtin_code(opcode.first); code_builder.add_version(opcode.second); auto code = code_builder.Finish(); diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake index 2106146d7..a5d6bedaa 100644 --- a/compiler/circlechef/requires.cmake +++ b/compiler/circlechef/requires.cmake @@ -1,9 +1,10 @@ require("arser") require("nnkit") require("cwrap") -require("mio-circle") +require("mio-circle04") require("safemain") require("hermes") require("hermes-std") require("foder") require("souschef") +require("circle-verify") diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt index 773ff5403..7ae619f8b 100644 --- a/compiler/circlechef/tests/CMakeLists.txt +++ b/compiler/circlechef/tests/CMakeLists.txt @@ -3,6 +3,15 @@ set(CIRCLERECIPES_DIR "${CircleRecipes_DIR}") file(GLOB RECIPES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.recipe") +set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>) +set(CIRCLECHEF_REVERSE_PATH $<TARGET_FILE:circlechef-reverse>) +if(DEFINED ENV{BUILD_HOST_EXEC}) + # TODO use better way to represent path for host executable + set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file) + set(CIRCLECHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/reverse/circlechef-reverse) + message(STATUS "CIRCLECHEF_FILE_PATH = ${CIRCLECHEF_FILE_PATH}") +endif(DEFINED ENV{BUILD_HOST_EXEC}) + foreach(RECIPE IN ITEMS ${RECIPES}) get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY) @@ -18,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .circle add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} - COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} - DEPENDS circlechef-file ${RECIPE_SOURCE_FILE} + COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} COMMENT "Generating ${RECIPE_OUTPUT_FILE}") list(APPEND TESTS ${RECIPE_PREFIX}) @@ -44,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .circle add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} - COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} - DEPENDS circlechef-file ${RECIPE_SOURCE_FILE} + COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} COMMENT "Generating ${RECIPE_OUTPUT_FILE}") list(APPEND TESTS ${RECIPE_PREFIX}) @@ -68,16 +77,16 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) # Generate .gen.recipe from generated .circle add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} - COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} - DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE} + COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") # now we are going to generate .gen.circle from .gen.recipe # to check generated .gen.recipe file is correct by using it. # as weight values may be different, binary comparision is not acceptable. add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} - COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} - DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") list(APPEND TESTS ${CIRCLE_PREFIX}.gen) @@ -96,13 +105,13 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) # Generate .gen.recipe from generated .circle add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} - COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} - DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE} + COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} - COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} - DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") list(APPEND TESTS ${CIRCLE_PREFIX}.gen) diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt index 7848ac722..b65c06677 100644 --- a/compiler/circledump/CMakeLists.txt +++ b/compiler/circledump/CMakeLists.txt @@ -1,6 +1,7 @@ -if(NOT TARGET mio_circle) +if(NOT TARGET mio_circle04) + message(STATUS "Skip circledump: mio_circle04 not found") return() -endif(NOT TARGET mio_circle) +endif(NOT TARGET mio_circle04) set(DRIVER "driver/Driver.cpp") @@ -9,8 +10,8 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circledump ${DRIVER} ${SOURCES}) target_include_directories(circledump PRIVATE include) target_link_libraries(circledump arser) -target_link_libraries(circledump mio_circle) +target_link_libraries(circledump mio_circle04) +target_link_libraries(circledump mio_circle04_helper) target_link_libraries(circledump safemain) -target_link_libraries(circledump flatbuffers-1.10) install(TARGETS circledump DESTINATION bin) diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md index e31c2d560..d2baf26b3 100644 --- a/compiler/circledump/README.md +++ b/compiler/circledump/README.md @@ -65,6 +65,6 @@ O T(3) ofm ### Dependency -- mio-circle +- mio-circle04 - safemain - FlatBuffers diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake index 81e0f0dbd..362d67cf4 100644 --- a/compiler/circledump/requires.cmake +++ b/compiler/circledump/requires.cmake @@ -1,3 +1,3 @@ require("arser") -require("mio-circle") +require("mio-circle04") require("safemain") diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp index 42b4ad97a..0b256dda8 100644 --- a/compiler/circledump/src/Dump.cpp +++ b/compiler/circledump/src/Dump.cpp @@ -15,6 +15,7 @@ */ #include <circledump/Dump.h> +#include <mio_circle/Helper.h> #include "Read.h" #include "OpPrinter.h" @@ -141,7 +142,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) // dump operands(tensors) os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) " - << "B(buffer index) OperandName" << std::endl; + << "B(buffer index) (variable) OperandName" << std::endl; for (uint32_t i = 0; i < tensors->Length(); ++i) { // TODO refactor to some better structure @@ -151,7 +152,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) if (tensor->shape()) dims = circleread::as_index_vector(tensor->shape()); - os << "T(" << reader.subgraph_index() << ":" << i << ") " << circleread::tensor_type(tensor) + os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor) << " "; os << "(" << dims << ") "; if (tensor->shape_signature()) @@ -160,7 +161,11 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) os << "(" << dims_sig << ") "; } os << "B(" << tensor->buffer() << ") "; - os << circleread::tensor_name(tensor) << std::endl; + if (tensor->is_variable()) + { + os << "(variable) "; + } + os << mio::circle::tensor_name(tensor) << std::endl; if (auto q_params = tensor->quantization()) { @@ -312,7 +317,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) if (input >= 0) { auto tensor = tensors->Get(input); - os << circleread::tensor_name(tensor); + os << mio::circle::tensor_name(tensor); } os << std::endl; } @@ -322,7 +327,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) if (output >= 0) { auto tensor = tensors->Get(output); - os << circleread::tensor_name(tensor); + os << mio::circle::tensor_name(tensor); } os << std::endl; } @@ -335,14 +340,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) for (const auto input : reader.inputs()) { auto tensor = tensors->Get(input); - std::string name = circleread::tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl; } for (const auto output : reader.outputs()) { auto tensor = tensors->Get(output); - std::string name = circleread::tensor_name(tensor); + std::string name = mio::circle::tensor_name(tensor); os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl; } @@ -364,6 +369,7 @@ void dump_model(std::ostream &os, const circle::Model *model) auto opcodes = reader.opcodes(); auto buffers = reader.buffers(); auto metadata = reader.metadata(); + auto signaturedefs = reader.signature_defs(); // dump operator_codes os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl; @@ -371,11 +377,14 @@ void dump_model(std::ostream &os, const circle::Model *model) for (auto opcode : opcodes) { circle::BuiltinOperator op_code = opcode->builtin_code(); - auto op_name = circleread::opcode_name(opcode); + // cast to int32_t to print as number or int8_t will print as ascii code + int32_t dp_code = static_cast<int32_t>(opcode->deprecated_builtin_code()); + + auto op_name = mio::circle::opcode_name(opcode); auto op_version = opcode->version(); os << "[" << opcode_index << "] " << op_name << " (code: " << op_code - << ", version: " << op_version << ")" << std::endl; + << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl; opcode_index++; } @@ -417,6 +426,37 @@ void dump_model(std::ostream &os, const circle::Model *model) os << std::endl; } + // dump signaturedef + if (signaturedefs != nullptr) + { + os << "SignatureDef" << std::endl; + for (uint32_t i = 0; i < signaturedefs->Length(); ++i) + { + auto sign_i = signaturedefs->Get(i); + os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph(" + << sign_i->subgraph_index() << ")" << std::endl; + + auto inputs_i = sign_i->inputs(); + for (uint32_t t = 0; t < inputs_i->Length(); ++t) + { + auto inputs_i_t = inputs_i->Get(t); + os << " I(" << t << ")" + << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") " + << inputs_i_t->name()->c_str() << std::endl; + } + + auto outputs_i = sign_i->outputs(); + for (uint32_t t = 0; t < outputs_i->Length(); ++t) + { + auto outputs_i_t = outputs_i->Get(t); + os << " O(" << t << ")" + << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") " + << outputs_i_t->name()->c_str() << std::endl; + } + } + os << std::endl; + } + for (uint32_t sg = 0; sg < num_subgraph; ++sg) { reader.select_subgraph(sg); diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp index ec91ed189..67e7fa5a6 100644 --- a/compiler/circledump/src/Load.cpp +++ b/compiler/circledump/src/Load.cpp @@ -76,7 +76,7 @@ public: { if (_value != -1) { - // Close on descturction + // Close on destructor close(_value); } } diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp index 7af3ff641..02e5c26b5 100644 --- a/compiler/circledump/src/OpPrinter.cpp +++ b/compiler/circledump/src/OpPrinter.cpp @@ -341,6 +341,7 @@ public: << ") "; os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function()) << ") "; + os << "keep_num_dims(" << params->keep_num_dims() << ") "; os << std::endl; } @@ -619,6 +620,23 @@ public: } }; +class SVDFPrinter : public OpPrinter +{ +public: + void options(const circle::Operator *op, std::ostream &os) const override + { + if (auto *params = op->builtin_options_as_SVDFOptions()) + { + os << " "; + os << "rank(" << params->rank() << ") "; + os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function()) + << ") "; + os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") "; + os << std::endl; + } + } +}; + class TransposeConvPrinter : public OpPrinter { public: @@ -754,6 +772,22 @@ public: } }; +class InstanceNormPrinter : public OpPrinter +{ +public: + void options(const circle::Operator *op, std::ostream &os) const override + { + if (auto *params = op->builtin_options_as_InstanceNormOptions()) + { + os << " "; + os << "epsilon(" << params->epsilon() << ") "; + os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function()) + << ") "; + os << std::endl; + } + } +}; + OpPrinterRegistry::OpPrinterRegistry() { _op_map[circle::BuiltinOperator_ADD] = make_unique<AddPrinter>(); @@ -824,6 +858,7 @@ OpPrinterRegistry::OpPrinterRegistry() _op_map[circle::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>(); _op_map[circle::BuiltinOperator_SUB] = make_unique<SubPrinter>(); _op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>(); + _op_map[circle::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>(); _op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>(); // There is no Option for TOPK_V2 _op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] = @@ -835,6 +870,7 @@ OpPrinterRegistry::OpPrinterRegistry() // Circle only _op_map[circle::BuiltinOperator_BCQ_FULLY_CONNECTED] = make_unique<BCQFullyConnectedPrinter>(); _op_map[circle::BuiltinOperator_BCQ_GATHER] = make_unique<BCQGatherPrinter>(); + _op_map[circle::BuiltinOperator_INSTANCE_NORM] = make_unique<InstanceNormPrinter>(); } } // namespace circledump diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp index db8298585..3a7e98cde 100644 --- a/compiler/circledump/src/Read.cpp +++ b/compiler/circledump/src/Read.cpp @@ -16,72 +16,21 @@ #include "Read.h" +#include <mio_circle/Helper.h> + #include <sstream> #include <string> namespace circleread { -bool is_valid(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_custom(const circle::OperatorCode *opcode) -{ - circle::BuiltinOperator code = opcode->builtin_code(); - return (code == circle::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const circle::OperatorCode *opcode) -{ - assert(opcode); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (!opcode->custom_code()) - return "(invalid custom)"; - - std::string custom_op = "CUSTOM("; - custom_op += opcode->custom_code()->c_str(); - custom_op += ")"; - return custom_op; - } - - circle::BuiltinOperator code = opcode->builtin_code(); - return circle::EnumNameBuiltinOperator(code); -} - -const char *tensor_type(const circle::Tensor *tensor) -{ - return circle::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const circle::Tensor *tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - auto name = tensor->name(); - if (name) - return name->c_str(); - - return kEmptyTensorName; -} - Reader::Reader(const circle::Model *model) { _version = model->version(); _subgraphs = model->subgraphs(); _buffers = model->buffers(); _metadata = model->metadata(); + _signature_defs = model->signature_defs(); auto opcodes = model->operator_codes(); for (const ::circle::OperatorCode *opcode : *opcodes) @@ -127,14 +76,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const assert(index < _op_codes.size()); const circle::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::circle::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - return circleread::opcode_name(opcode); + return mio::circle::opcode_name(opcode); } bool Reader::select_subgraph(uint32_t sgindex) diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h index c61a1ab6d..05b0e5072 100644 --- a/compiler/circledump/src/Read.h +++ b/compiler/circledump/src/Read.h @@ -41,12 +41,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } -bool is_valid(const circle::OperatorCode *opcode); -bool is_custom(const circle::OperatorCode *opcode); -std::string opcode_name(const circle::OperatorCode *opcode); -const char *tensor_type(const circle::Tensor *tensor); -const char *tensor_name(const circle::Tensor *tensor); - /** * @brief Loads Circle file and provides helpers to access attributes */ @@ -58,6 +52,7 @@ private: using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>; using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>; using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>; + using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>; public: Reader(const circle::Model *model); @@ -75,6 +70,7 @@ public: const std::vector<int32_t> &outputs() const { return _outputs; } const circle::DataFormat &data_format() const { return _data_format; } const CircleMetadata_t *metadata() const { return _metadata; } + const CircleSignatureDef_t *signature_defs() const { return _signature_defs; } uint32_t num_subgraph() const { return _subgraphs->Length(); } @@ -95,6 +91,7 @@ private: const CircleTensors_t *_tensors{nullptr}; const CircleOperators_t *_operators{nullptr}; const CircleMetadata_t *_metadata{nullptr}; + const CircleSignatureDef_t *_signature_defs{nullptr}; uint32_t _subgraph_index = 0; std::string _subgraph_name; diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt index 2ab8c0529..0fb99ddba 100644 --- a/compiler/cli/CMakeLists.txt +++ b/compiler/cli/CMakeLists.txt @@ -4,11 +4,11 @@ list(APPEND TESTS "src/App.test.cpp") add_library(cli ${SOURCES}) target_include_directories(cli PUBLIC include) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) GTest_AddTEst(cli_test ${TESTS}) target_link_libraries(cli_test cli) diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt index 6de634a25..404149c15 100644 --- a/compiler/common-artifacts/CMakeLists.txt +++ b/compiler/common-artifacts/CMakeLists.txt @@ -1,82 +1,63 @@ #[[ Generate common python virtual enviornment ]] -find_package(PythonInterp 3 QUIET) -find_package(PythonLibs 3 QUIET) +find_package(PythonInterp 3.8 QUIET) +find_package(PythonLibs 3.8 QUIET) if(NOT ${PYTHONINTERP_FOUND}) message(STATUS "Build common-artifacts: FALSE (Python3 is missing)") return() endif() -if(${PYTHON_VERSION_MINOR} LESS 3) - message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.3)") +if(${PYTHON_VERSION_MINOR} LESS 8) + message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.8)") return() endif() -# Create python virtual environment with tensorflow 1.13.2 -set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2") - -# Create python virtual environment with tensorflow 2.3.0 -set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0") # Create python virtual environment with tensorflow 2.6.0 set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0") add_custom_command( - OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2} - COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2} -) - -add_custom_command( - OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0} - COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0} -) -add_custom_command( OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0} COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0} ) -# Create requirements.txt and install required pip packages -set(REQUIREMENTS_FILE "requirements.txt") -set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}") -set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}") -set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}") +# Create python virtual environment with tensorflow 2.8.0 +set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0") -# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0' -# NOTE adding version is for temporary hotfix of setuptools 50.x.y version add_custom_command( - OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} - COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} - COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 - COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade - DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} + OUTPUT ${VIRTUALENV_OVERLAY_TF_2_8_0} + COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_8_0} ) -add_custom_command( - OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} - COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} - COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} - COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 - COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade - DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0} -) +# Create requirements.txt and install required pip packages +set(REQUIREMENTS_FILE "requirements.txt") +set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}") +set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}") add_custom_command( OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade + COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} ) +add_custom_command( + OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade + DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0} +) + add_custom_target(common_artifacts_python_deps ALL - DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} - ${VIRTUALENV_OVERLAY_TF_2_3_0} - ${VIRTUALENV_OVERLAY_TF_2_6_0} - ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} - ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} + DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} + ${VIRTUALENV_OVERLAY_TF_2_8_0} ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} ) #[[ Generate common resources ]] @@ -97,7 +78,6 @@ target_link_libraries(testDataGenerator PRIVATE arser) target_link_libraries(testDataGenerator PRIVATE foder) target_link_libraries(testDataGenerator PRIVATE luci_import) target_link_libraries(testDataGenerator PRIVATE luci_interpreter) -target_link_libraries(testDataGenerator PRIVATE mio_circle) target_link_libraries(testDataGenerator PRIVATE safemain) unset(TEST_DEPS) @@ -109,6 +89,7 @@ set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}") set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}") set(TEST_RECIPE_FILENAME "test.recipe") set(TEST_RULE_FILENAME "test.rule") +set(TEST_QCONFIG_FILENAME "test.qconf.json") set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh") # Get test case list @@ -140,12 +121,20 @@ endmacro() include("exclude.lst") +# TODO revise using variadic arguments +macro(tcgenerate_option NAME OPTION ARG1 ARG2 ARG3) + set(TCGEN_OPT_${NAME} ${OPTION} ${ARG1} ${ARG2} ${ARG3}) +endmacro() + +include("options.lst") + foreach(RECIPE IN ITEMS ${RECIPES}) unset(OPT_FORMAT) unset(MODEL_FORMAT) set(RECIPE_FILE "${RECIPE}.recipe") set(RULE_FILE "${RECIPE}.rule") + set(QCONFIG_FILE "${RECIPE}.qconf.json") set(TFLITE_RECIPE_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}") set(CIRCLE_RECIPE_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}") @@ -174,8 +163,20 @@ foreach(RECIPE IN ITEMS ${RECIPES}) set(RULE_SOURCE_PATH ${CIRCLE_RULE_SOURCE_PATH}) endif() + set(TFLITE_QCONFIG_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}") + set(CIRCLE_QCONFIG_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}") + + unset(QCONFIG_SOURCE_PATH) + if(EXISTS "${TFLITE_QCONFIG_SOURCE_PATH}") + set(QCONFIG_SOURCE_PATH ${TFLITE_QCONFIG_SOURCE_PATH}) + endif() + if(EXISTS "${CIRCLE_QCONFIG_SOURCE_PATH}") + set(QCONFIG_SOURCE_PATH ${CIRCLE_QCONFIG_SOURCE_PATH}) + endif() + set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}") set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}") + set(QCONFIG_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${QCONFIG_FILE}") set(TFLITE_FILE "${RECIPE}.tflite") set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}") @@ -200,6 +201,16 @@ foreach(RECIPE IN ITEMS ${RECIPES}) list(APPEND TEST_DEPS ${RULE_BINARY_PATH}) endif() + if(DEFINED QCONFIG_SOURCE_PATH) + # Copy .qconf.json + add_custom_command(OUTPUT ${QCONFIG_BINARY_PATH} + COMMAND ${CMAKE_COMMAND} -E copy "${QCONFIG_SOURCE_PATH}" "${QCONFIG_BINARY_PATH}" + DEPENDS ${QCONFIG_SOURCE_PATH} + COMMENT "Generate ${QCONFIG_FILE}" + ) + list(APPEND TEST_DEPS ${QCONFIG_BINARY_PATH}) + endif() + if(${MODEL_FORMAT} STREQUAL "tflite") # Generate .tflite add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH} @@ -274,11 +285,21 @@ foreach(RECIPE IN ITEMS ${RECIPES}) ) list(APPEND TEST_DEPS ${TC_DIRECTORY}) + # set ADDITIONAL_OPTIONS as empty (one space before closing is intentional) + set(ADDITIONAL_OPTIONS ) + if(DEFINED TCGEN_OPT_${RECIPE}) + set(ADDITIONAL_OPTIONS ${ADDITIONAL_OPTIONS} ${TCGEN_OPT_${RECIPE}}) + endif() + # Generate input.h5, expected.h5 set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5") set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5") add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE} - COMMAND $<TARGET_FILE:testDataGenerator> --input_data ${INPUT_HDF5_FILE} --expected_data ${EXPECTED_HDF5_FILE} ${MODEL_FILE} + COMMAND $<TARGET_FILE:testDataGenerator> + --input_data ${INPUT_HDF5_FILE} + --expected_data ${EXPECTED_HDF5_FILE} + ${ADDITIONAL_OPTIONS} + ${MODEL_FILE} DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY} COMMENT "Generate input.h5 and expected.h5 in ${NNPKG_FILE}/metadata/tc" ) diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index f32e00413..92b07fde8 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -14,7 +14,6 @@ optimize(UnidirectionalSequenceLSTM_001) # This recipe contains is_variable Tens tcgenerate(Abs_000) tcgenerate(AddN_000) tcgenerate(Add_001) # runtime doesn't support -tcgenerate(Add_U8_000) tcgenerate(Add_STR_000) # STRING is not supported tcgenerate(Add_STR_001) # STRING is not supported tcgenerate(All_000) @@ -26,32 +25,24 @@ tcgenerate(ArgMin_U8_000) tcgenerate(ArgMin_U8_001) tcgenerate(ArgMin_U8_002) tcgenerate(ArgMin_U8_003) -tcgenerate(BatchMatMul_000) tcgenerate(BatchMatMulV2_000) tcgenerate(BatchMatMulV2_001) tcgenerate(BatchToSpaceND_000) tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator -tcgenerate(Cast_000) -tcgenerate(Cast_001) tcgenerate(Ceil_000) tcgenerate(Conv2D_003) # runtime doesn't support dilation tcgenerate(Cos_000) tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet -tcgenerate(Dequantize_000) # runtime and luci-interpreter doesn't support Dequantize op yet -tcgenerate(ExpandDims_000) -tcgenerate(ExpandDims_001) -tcgenerate(ExpandDims_002) -tcgenerate(ExpandDims_003) -tcgenerate(ExpandDims_004) +tcgenerate(ExpandDims_001) # luci-interpreter doesn't support undefined shape +tcgenerate(ExpandDims_002) # luci-interpreter doesn't support undefined shape tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet tcgenerate(Fill_000) tcgenerate(Fill_001) tcgenerate(FloorMod_000) tcgenerate(FloorMod_001) tcgenerate(FullyConnected_U8_000) -tcgenerate(Gather_000) tcgenerate(GatherNd_000) tcgenerate(GatherNd_001) tcgenerate(L2Pool2D_U8_000) @@ -75,8 +66,8 @@ tcgenerate(Mul_U8_000) tcgenerate(Neg_000) tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet -tcgenerate(Net_Conv_QuantDequant_000) # luci-interpreter doesn't support Quantize/Dequantize yet tcgenerate(Net_Dangle_001) +tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim tcgenerate(OneHot_000) tcgenerate(OneHot_001) @@ -157,13 +148,11 @@ tcgenerate(While_001) # Needs luci-interpreter int32_t support for ADD, EQUAL tcgenerate(While_002) # Needs luci-interpreter int32_t support for ADD, EQUAL tcgenerate(While_003) # Needs luci-interpreter int32_t support for ADD, EQUAL, and dynamic shape for WHILE tcgenerate(YUV_TO_RGB_000) -tcgenerate(YUV_TO_RGB_U8_000) tcgenerate(ZerosLike_000) ## CircleRecipes tcgenerate(BCQFullyConnected_000) tcgenerate(BCQFullyConnected_001) tcgenerate(BCQGather_000) -tcgenerate(CircleBatchMatMul_000) tcgenerate(InstanceNorm_000) tcgenerate(InstanceNorm_001) diff --git a/compiler/common-artifacts/options.lst b/compiler/common-artifacts/options.lst new file mode 100644 index 000000000..5e0ff9da5 --- /dev/null +++ b/compiler/common-artifacts/options.lst @@ -0,0 +1,6 @@ +## Additional Options for test recipe + +#[[ tcgenerate_option : add additional option(s) for generation ]] + +# make valid 'indices' input value +tcgenerate_option(Gather_001 --input_range indices 0 3) diff --git a/compiler/common-artifacts/requires.cmake b/compiler/common-artifacts/requires.cmake index d7bed21fe..cc07e17f6 100644 --- a/compiler/common-artifacts/requires.cmake +++ b/compiler/common-artifacts/requires.cmake @@ -4,6 +4,6 @@ require("circlechef") require("foder") require("luci") require("luci-interpreter") -require("mio-circle") require("safemain") require("tflchef") +require("tflite2circle") diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp index b00e93e88..33cecbbe2 100644 --- a/compiler/common-artifacts/src/TestDataGenerator.cpp +++ b/compiler/common-artifacts/src/TestDataGenerator.cpp @@ -18,7 +18,6 @@ #include <foder/FileLoader.h> #include <luci/Importer.h> #include <luci_interpreter/Interpreter.h> -#include <mio/circle/schema_generated.h> #include <H5Cpp.h> @@ -27,6 +26,9 @@ #include <memory> #include <random> #include <string> +#include <vector> +#include <cassert> +#include <cstdlib> namespace { @@ -43,6 +45,8 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype) { case loco::DataType::U8: return H5::PredType::NATIVE_UINT8; + case loco::DataType::S16: + return H5::PredType::NATIVE_INT16; case loco::DataType::S32: return H5::PredType::NATIVE_INT32; case loco::DataType::S64: @@ -56,7 +60,7 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype) } } -template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, uint32_t size) +template <typename T> void generate_random_data(std::mt19937 &gen, void *data, uint32_t size) { std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2) for (uint32_t i = 0; i < size; i++) @@ -65,7 +69,7 @@ template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, ui } } -template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size) +template <> void generate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size) { std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2) for (uint32_t i = 0; i < size; i++) @@ -74,6 +78,20 @@ template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32 } } +template <typename T> +void generate_random_range(void *data, uint32_t size, int32_t range_min, int32_t range_max) +{ + assert(range_min <= range_max); + + for (uint32_t i = 0; i < size; i++) + { + // +1 will make value of [range_min, range_max] + int32_t range = range_max - range_min + 1; + int32_t value = (rand() % range) + range_min; + static_cast<T *>(data)[i] = static_cast<T>(value); + } +} + void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed) { std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd() @@ -81,19 +99,38 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t switch (dtype) { case loco::DataType::U8: - geneate_random_data<uint8_t>(gen, data, size); + generate_random_data<uint8_t>(gen, data, size); + break; + case loco::DataType::S16: + generate_random_data<int16_t>(gen, data, size); break; case loco::DataType::S32: - geneate_random_data<int32_t>(gen, data, size); + generate_random_data<int32_t>(gen, data, size); break; case loco::DataType::S64: - geneate_random_data<int64_t>(gen, data, size); + generate_random_data<int64_t>(gen, data, size); break; case loco::DataType::FLOAT32: - geneate_random_data<float>(gen, data, size); + generate_random_data<float>(gen, data, size); break; case loco::DataType::BOOL: - geneate_random_data<bool>(gen, data, size); + generate_random_data<bool>(gen, data, size); + break; + default: + throw std::runtime_error("NYI data type."); + } +} + +void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t range_min, + int32_t range_max) +{ + switch (dtype) + { + case loco::DataType::S32: + generate_random_range<int32_t>(data, size, range_min, range_max); + break; + case loco::DataType::S64: + generate_random_range<int64_t>(data, size, range_min, range_max); break; default: throw std::runtime_error("NYI data type."); @@ -120,6 +157,11 @@ int entry(int argc, char **argv) .required(false) .nargs(0) .help("Put a fixed seed into the random number generator"); + arser.add_argument("--input_range") + .required(false) + .nargs(3) + .type(arser::DataType::STR_VEC) + .help("Set random number range [min max] for the input as 'name min max'"); try { @@ -176,6 +218,24 @@ int entry(int argc, char **argv) std::unique_ptr<H5::Group> output_value_group = std::make_unique<H5::Group>(output_file.createGroup("value")); + std::string range_name; + int32_t range_min = 0; + int32_t range_max = 0; + bool range_check = false; + bool range_input_found = false; + if (arser["--input_range"]) + { + // NOTE limitation: we can only set one input range + // TODO expand this for multiple inputs + std::vector<std::string> values = arser.get<std::vector<std::string>>("--input_range"); + assert(values.size() == 3); + range_name = values.at(0); + // TODO add check for valid numbers + range_min = std::atoi(values.at(1).c_str()); + range_max = std::atoi(values.at(2).c_str()); + range_check = true; + } + std::random_device rd; // used to obtain a seed for the random number engine uint32_t input_index = 0; // TODO remove indentation @@ -187,6 +247,7 @@ int entry(int argc, char **argv) { const auto *input_node = dynamic_cast<const luci::CircleInput *>(node); std::string name = input_node->name(); + assert(not name.empty()); if (name.find(":") == std::string::npos) name += ":0"; @@ -217,7 +278,12 @@ int entry(int argc, char **argv) std::vector<int8_t> data(byte_size); // generate random data - if (arser["--fixed_seed"]) + if (range_name == input_node->name()) + { + fill_random_range(data.data(), data_size, input_node->dtype(), range_min, range_max); + range_input_found = true; + } + else if (arser["--fixed_seed"]) fill_random_data(data.data(), data_size, input_node->dtype(), 0); else fill_random_data(data.data(), data_size, input_node->dtype(), rd()); @@ -230,6 +296,12 @@ int entry(int argc, char **argv) } } + if (range_check && not range_input_found) + { + std::cerr << "ERROR: input_range for input [" << range_name << "] not found." << std::endl; + return EXIT_FAILURE; + } + interpreter.interpret(); // dump output data into hdf5 file diff --git a/compiler/dio-hdf5/CMakeLists.txt b/compiler/dio-hdf5/CMakeLists.txt new file mode 100644 index 000000000..199c0d59d --- /dev/null +++ b/compiler/dio-hdf5/CMakeLists.txt @@ -0,0 +1,30 @@ +nnas_find_package(HDF5 COMPONENTS STATIC QUIET) + +if(NOT HDF5_FOUND) + message(STATUS "Build dio_hdf5: FAILED (missing HDF5)") + return() +endif(NOT HDF5_FOUND) + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(dio_hdf5 SHARED ${SOURCES}) +target_include_directories(dio_hdf5 PUBLIC include) +target_include_directories(dio_hdf5 PUBLIC ${HDF5_INCLUDE_DIRS}) +target_link_libraries(dio_hdf5 PUBLIC ${HDF5_CXX_LIBRARIES}) +target_link_libraries(dio_hdf5 PUBLIC loco) + +install(TARGETS dio_hdf5 DESTINATION lib) +install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(dio_hdf5_test ${TESTS}) +target_include_directories(dio_hdf5_test PRIVATE include) +target_link_libraries(dio_hdf5_test dio_hdf5) diff --git a/compiler/dio-hdf5/README.md b/compiler/dio-hdf5/README.md new file mode 100644 index 000000000..aa2398ce8 --- /dev/null +++ b/compiler/dio-hdf5/README.md @@ -0,0 +1,29 @@ +# dio-hdf5 + +_dio-hdf5_ is a library to help loading hdf5 files (_dio_ indicates data I/O). + +The hdf5 file should have the following structure. + +``` +Group "/" + > Group <group_name> + > Group <data_idx> + > Dataset <input_idx> +``` + +## Example + +```cpp +dio_hdf5::HDF5Importer h5{input_path}; + +h5.importGroup("value"); + +// Prepare buffer +const uint32_t input_byte_size = 16; +std::vector<char> buffer(input_byte_size); + +// Write the first input of the first data to buffer +readTensor(0, 0, buffer.data()); + +DO_SOMETHING_WITH(buffer); +``` diff --git a/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h new file mode 100644 index 000000000..aafcfbbf3 --- /dev/null +++ b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DIO_HDF5_H__ +#define __DIO_HDF5_H__ + +#include <H5Cpp.h> + +#include <loco.h> + +#include <string> +#include <vector> + +namespace dio +{ +namespace hdf5 +{ + +// HDF5Importer reads an input data saved in the hdf5 file in the given path +// The hierarchy of the hdf5 file is as follows. +// Group "/" +// > Group <group_name> +// > Group <data_idx> +// > Dataset <input_idx> +// data_idx : index of the data (dataset file can contain multiple data) +// input_idx : index of the input (DNN model can have multiple inputs) +// Ex: the j'th input of the i'th data of group 'value' can be accessed by "/value/i/j" +class HDF5Importer final +{ +public: + explicit HDF5Importer(const std::string &path); + +public: + /** + * @note importGroup has to be called before readTensor is called + * Otherwise, readTensor will throw an exception + */ + void importGroup(const std::string &group) { _group = _file.openGroup(group); } + + /** + * @brief Read tensor data from file and store it into buffer + * @details A tensor in the file can be retrieved with (data_idx, input_idx) + * @param data_idx : index of the data + * @param input_idx : index of the input + * @param dtype : pointer to write the tensor's data type + * @param shape : pointer to write the tensor's shape + * @param buffer : pointer to write the tensor's data + */ + void readTensor(int32_t data_idx, int32_t input_idx, loco::DataType *dtype, + std::vector<loco::Dimension> *shape, void *buffer); + + // Read a raw tensor (no type/shape is specified) + void readTensor(int32_t data_idx, int32_t input_idx, void *buffer); + + bool isRawData() { return _group.attrExists("rawData"); } + + int32_t numData() { return _group.getNumObjs(); } + + int32_t numInputs(int32_t data_idx); + +private: + H5::H5File _file; + H5::Group _group; +}; + +} // namespace hdf5 +} // namespace dio + +#endif // __DIO_HDF5_H__ diff --git a/compiler/dio-hdf5/requires.cmake b/compiler/dio-hdf5/requires.cmake new file mode 100644 index 000000000..44f6870da --- /dev/null +++ b/compiler/dio-hdf5/requires.cmake @@ -0,0 +1 @@ +require("loco") diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/dio-hdf5/src/HDF5Importer.cpp index cfb270ce0..9ae556b77 100644 --- a/compiler/record-minmax/src/HDF5Importer.cpp +++ b/compiler/dio-hdf5/src/HDF5Importer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,16 +14,17 @@ * limitations under the License. */ -#include "HDF5Importer.h" +#include "dio_hdf5/HDF5Importer.h" #include <H5Cpp.h> #include <string> +#include <vector> #include <cassert> #include <stdexcept> -using Shape = luci_interpreter::Shape; -using DataType = luci_interpreter::DataType; +using Shape = std::vector<loco::Dimension>; +using DataType = loco::DataType; namespace { @@ -36,10 +37,10 @@ Shape toInternalShape(const H5::DataSpace &dataspace) dims.resize(rank, 0); dataspace.getSimpleExtentDims(dims.data()); - Shape res(rank); + Shape res; for (int axis = 0; axis < rank; ++axis) { - res.dim(axis) = dims[axis]; + res.emplace_back(dims[axis]); } return res; @@ -108,18 +109,28 @@ void readTensorData(H5::DataSet &tensor, int64_t *buffer) } // namespace -namespace record_minmax +namespace dio { +namespace hdf5 +{ + +HDF5Importer::HDF5Importer(const std::string &path) +{ + if (_file.isHdf5(path) == false) + throw std::runtime_error("Given data file is not HDF5"); + + _file = H5::H5File(path, H5F_ACC_RDONLY); +} int32_t HDF5Importer::numInputs(int32_t record_idx) { - auto records = _value_grp.openGroup(std::to_string(record_idx)); + auto records = _group.openGroup(std::to_string(record_idx)); return records.getNumObjs(); } void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer) { - auto record = _value_grp.openGroup(std::to_string(record_idx)); + auto record = _group.openGroup(std::to_string(record_idx)); auto tensor = record.openDataSet(std::to_string(input_idx)); readTensorData(tensor, static_cast<uint8_t *>(buffer)); @@ -128,7 +139,7 @@ void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffe void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape, void *buffer) { - auto record = _value_grp.openGroup(std::to_string(record_idx)); + auto record = _group.openGroup(std::to_string(record_idx)); auto tensor = record.openDataSet(std::to_string(input_idx)); auto tensor_dtype = tensor.getDataType(); @@ -156,4 +167,5 @@ void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *d } } -} // namespace record_minmax +} // namespace hdf5 +} // namespace dio diff --git a/compiler/dio-hdf5/src/HDF5Importer.test.cpp b/compiler/dio-hdf5/src/HDF5Importer.test.cpp new file mode 100644 index 000000000..61a027fc5 --- /dev/null +++ b/compiler/dio-hdf5/src/HDF5Importer.test.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dio_hdf5/HDF5Importer.h" + +#include <loco.h> + +#include <H5Cpp.h> + +#include <cstdio> + +#include <gtest/gtest.h> + +using HDF5Importer = dio::hdf5::HDF5Importer; +using Shape = std::vector<loco::Dimension>; +using DataType = loco::DataType; + +namespace +{ + +const std::string file_name("dio_hdf5_test.h5"); + +void createFile() +{ + // File already exists. Remove it. + if (auto f = fopen(file_name.c_str(), "r")) + { + fclose(f); + if (remove(file_name.c_str()) != 0) + throw std::runtime_error("Error deleting file."); + } + + const auto rank = 3; + hsize_t dim[3] = {1, 2, 3}; + H5::DataSpace space(rank, dim); + + float data[] = {0, 1, 2, 3, 4, 5}; + + // Create test file in the current directory + H5::H5File file(file_name, H5F_ACC_TRUNC); + { + file.createGroup("/value"); + file.createGroup("/value/0"); + H5::DataSet dataset(file.createDataSet("/value/0/0", H5::PredType::IEEE_F32BE, space)); + dataset.write(data, H5::PredType::IEEE_F32LE); + } +} + +} // namespace + +TEST(dio_hdf5_test, read_with_type_shape) +{ + createFile(); + + HDF5Importer h5(::file_name); + + h5.importGroup("value"); + + std::vector<float> buffer(6); + + DataType dtype; + Shape shape; + h5.readTensor(0, 0, &dtype, &shape, buffer.data()); + + for (uint32_t i = 0; i < 6; i++) + EXPECT_EQ(i, buffer[i]); + + EXPECT_EQ(DataType::FLOAT32, dtype); + EXPECT_EQ(3, shape.size()); + EXPECT_EQ(1, shape[0]); + EXPECT_EQ(2, shape[1]); + EXPECT_EQ(3, shape[2]); +} + +TEST(dio_hdf5_test, wrong_path_NEG) +{ + const std::string wrong_path = "not_existing_file_for_dio_hdf5_test"; + + EXPECT_ANY_THROW(HDF5Importer h5(wrong_path)); +} + +TEST(dio_hdf5_test, wrong_group_name_NEG) +{ + createFile(); + + HDF5Importer h5(::file_name); + + EXPECT_ANY_THROW(h5.importGroup("wrong")); +} + +TEST(dio_hdf5_test, data_out_of_index_NEG) +{ + createFile(); + + HDF5Importer h5(::file_name); + + h5.importGroup("value"); + + std::vector<float> buffer(6); + + DataType dtype; + Shape shape; + // Read non-existing data (data_idx = 1) + EXPECT_ANY_THROW(h5.readTensor(1, 0, &dtype, &shape, buffer.data())); +} + +TEST(dio_hdf5_test, input_out_of_index_NEG) +{ + createFile(); + + HDF5Importer h5(::file_name); + + h5.importGroup("value"); + + std::vector<float> buffer(6); + + DataType dtype; + Shape shape; + // Read non-existing input (input_idx = 1) + EXPECT_ANY_THROW(h5.readTensor(0, 1, &dtype, &shape, buffer.data())); +} diff --git a/compiler/dredd-rule-lib/rule-lib.sh b/compiler/dredd-rule-lib/rule-lib.sh index 9254cc9a7..c25dc5fb4 100755 --- a/compiler/dredd-rule-lib/rule-lib.sh +++ b/compiler/dredd-rule-lib/rule-lib.sh @@ -217,4 +217,21 @@ op_version() echo ${ACTUAL} } +tensor_dtype() +{ + argc_check $# 1 + file_path_check ${COMPILED_FILE} + file_path_check ${INSPECT_PROG_PATH} + + set -o pipefail + + ACTUAL=`init_error_log ; \ + ${INSPECT_PROG_PATH} --tensor_dtype ${COMPILED_FILE} | \ + awk -v tensor_name="$1" '{ if ($1 == tensor_name) print $2}'` + + check_success_exit_code $? 0 + + echo ${ACTUAL} +} + # TODO define more qullity test function diff --git a/compiler/embedded-import-value-test/.gitignore b/compiler/embedded-import-value-test/.gitignore new file mode 100644 index 000000000..8dbfa9012 --- /dev/null +++ b/compiler/embedded-import-value-test/.gitignore @@ -0,0 +1 @@ +/test.local.lst diff --git a/compiler/embedded-import-value-test/CMakeLists.txt b/compiler/embedded-import-value-test/CMakeLists.txt new file mode 100644 index 000000000..785edfc7d --- /dev/null +++ b/compiler/embedded-import-value-test/CMakeLists.txt @@ -0,0 +1,34 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +set(SRCS_TEST_DRIVER src/TestDriver.cpp) + +# create driver +add_executable(test_driver ${SRCS_TEST_DRIVER}) +target_link_libraries(test_driver PRIVATE luci_interpreter_import) +target_link_libraries(test_driver PRIVATE luci_interpreter) +target_link_libraries(test_driver PRIVATE safemain) + +unset(EMBEDDED_IMPORT_VALUE_TESTS) + +macro(addeval NAME) + list(APPEND EMBEDDED_IMPORT_VALUE_TESTS ${NAME}) +endmacro(addeval) + +# Read "test.lst" +include("test.lst") +# Read "test.local.lst" if exists +include("test.local.lst" OPTIONAL) + +# Generate dependencies +add_custom_target(embedded_import_testfiles ALL DEPENDS ${TESTFILES}) + +get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) + +add_test(NAME embedded_import_value_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh" + "${CMAKE_CURRENT_BINARY_DIR}" + "${ARTIFACTS_BIN_PATH}" + ${EMBEDDED_IMPORT_VALUE_TESTS} +) diff --git a/compiler/embedded-import-value-test/README.md b/compiler/embedded-import-value-test/README.md new file mode 100644 index 000000000..71a95486f --- /dev/null +++ b/compiler/embedded-import-value-test/README.md @@ -0,0 +1,13 @@ +# embedded-import-value-test + +`embedded-import-value-test` checks models imported with and without constant copying produces same output values. + +The test proceeds as follows: + +1. Generate random input for provided circle model. + +2. Import circle model to luci in 2 modes: + - With constant copying (default mode). + - Without constant copying (experimental feature) + +3. Compare the execution result of both modes. The result must be the same. diff --git a/compiler/embedded-import-value-test/evalverify.sh b/compiler/embedded-import-value-test/evalverify.sh new file mode 100755 index 000000000..a99e76f3e --- /dev/null +++ b/compiler/embedded-import-value-test/evalverify.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# This script verifies that imported without constants copying models executes well in luci_interpreter +# +# HOW TO USE +# +# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <TEST 1> <TEST 2> ... +# bin_dir : build directory of embedded-import-value-test (ex: build/compiler/embedded-import-value-test) +# work_dir : artifacts directory where test materials exist + +BINDIR="$1"; shift +WORKDIR="$1"; shift +TEST_DRIVER_PATH="${BINDIR}/test_driver" +TEST_RESULT_DIR="${BINDIR}/result" + +TESTED=() +PASSED=() +FAILED=() + +mkdir -p "${TEST_RESULT_DIR}" +for TESTCASE in "$@"; do + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${WORKDIR}/${TESTCASE}" + TEST_RESULT_FILE="${TEST_RESULT_DIR}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}.log" <( + exec 2>&1 + set -ex + + "${TEST_DRIVER_PATH}" --model "${TESTCASE_FILE}.circle" + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("${TESTCASE}") + else + FAILED+=("${TESTCASE}") + fi +done + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/embedded-import-value-test/requires.cmake b/compiler/embedded-import-value-test/requires.cmake new file mode 100644 index 000000000..f8af5f27e --- /dev/null +++ b/compiler/embedded-import-value-test/requires.cmake @@ -0,0 +1,6 @@ +require("common-artifacts") +require("luci") +require("luci-interpreter") +require("safemain") +require("oops") +require("loco") diff --git a/compiler/embedded-import-value-test/src/TestDriver.cpp b/compiler/embedded-import-value-test/src/TestDriver.cpp new file mode 100644 index 000000000..63fd745eb --- /dev/null +++ b/compiler/embedded-import-value-test/src/TestDriver.cpp @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci_interpreter/GraphBuilderRegistry.h> +#include <luci_interpreter/Interpreter.h> + +#include <luci/Importer.h> + +#include <cstdlib> +#include <fstream> +#include <iostream> +#include <vector> +#include <string> +#include <random> + +namespace +{ + +uint32_t tensor_size_of(const luci::CircleNode *node) +{ + uint32_t tensor_size = loco::size(node->dtype()); + for (uint32_t i = 0; i < node->rank(); ++i) + tensor_size *= node->dim(i).value(); + return tensor_size; +} + +std::vector<uint8_t> random_data_for(const luci::CircleInput *node) +{ + // allocate data buffer + std::vector<uint8_t> inputs_data(tensor_size_of(node)); + auto *buffer = inputs_data.data(); + + // define size of buffer in elements + const auto dtype = node->dtype(); + assert(inputs_data.size() % loco::size(dtype) == 0); // FIX ME UNLESS + const auto element_count = inputs_data.size() / loco::size(dtype); + + // random generator engine + std::random_device device; + std::mt19937 engine{device()}; + + // fill buffer with random data + switch (node->dtype()) + { + case loco::DataType::FLOAT32: + { + auto element_buffer = reinterpret_cast<float *>(buffer); + + std::uniform_real_distribution<float> distrib(-3, 3); + const auto generator = [&distrib, &engine]() { return distrib(engine); }; + std::generate(element_buffer, element_buffer + element_count, generator); + + break; + } + case loco::DataType::U8: + { + auto element_buffer = buffer; + + std::uniform_int_distribution<uint8_t> distrib(100, 200); + const auto generator = [&distrib, &engine]() { return distrib(engine); }; + std::generate(element_buffer, element_buffer + element_count, generator); + + break; + } + case loco::DataType::S16: + { + auto element_buffer = reinterpret_cast<int16_t *>(buffer); + + std::uniform_int_distribution<int16_t> distrib(0, 100); + const auto generator = [&distrib, &engine]() { return distrib(engine); }; + std::generate(element_buffer, element_buffer + element_count, generator); + + break; + } + case loco::DataType::S32: + { + auto element_buffer = reinterpret_cast<int32_t *>(buffer); + + std::uniform_int_distribution<int32_t> distrib(0, 100); + const auto generator = [&distrib, &engine]() { return distrib(engine); }; + std::generate(element_buffer, element_buffer + element_count, generator); + + break; + } + case loco::DataType::BOOL: + { + // num of bool data type is equivalent to uint8_t num in [0, 1] range + auto element_buffer = buffer; + + std::uniform_int_distribution<uint8_t> distrib(0, 1); + const auto generator = [&distrib, &engine]() { return distrib(engine); }; + std::generate(element_buffer, element_buffer + element_count, generator); + + break; + } + default: + // TODO Support other dtypes + throw std::runtime_error("Unsupported data type, yet!"); + } + + return inputs_data; +} + +} // namespace + +int entry(int argc, char **argv) +{ + // check arguments + if (argc != 3 || std::string(argv[1]) != "--model") + { + std::cerr << "Usage: " << argv[0] << " --model <path/to/model>" << std::endl; + return EXIT_FAILURE; + } + + // open file with model + const auto model_file = std::string(argv[2]); + std::ifstream fs(model_file, std::ifstream::binary); + if (fs.fail()) + { + std::cerr << "Cannot open model file \"" << model_file << "\"." << std::endl; + return EXIT_FAILURE; + } + + // create constant circle model + const std::vector<char> model_buffer((std::istreambuf_iterator<char>(fs)), + std::istreambuf_iterator<char>()); + const auto circle_model = circle::GetModel(model_buffer.data()); + + // create random model's inputs + std::vector<std::vector<uint8_t>> inputs_data; + { + // model inputs + auto model = luci::Importer(nullptr).importModule(circle_model); + const auto inputs = loco::input_nodes(model->graph()); + + // create random data for each input + for (const auto *input : inputs) + { + const auto input_node = loco::must_cast<const luci::CircleInput *>(input); + inputs_data.emplace_back(random_data_for(input_node)); + } + } + + // interpret given module + const auto interpret_module_and_compute_output = + [&](const std::unique_ptr<luci::Module> &module) { + // create interpreter + luci_interpreter::Interpreter interpreter(module.get()); + + // model's input and output nodes + const auto input_nodes = loco::input_nodes(module->graph()); + const auto output_nodes = loco::output_nodes(module->graph()); + + // set inputs + for (uint32_t i = 0; i < input_nodes.size(); ++i) + { + const auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]); + const auto &data = inputs_data.at(i); + interpreter.writeInputTensor(input_node, data.data(), data.size()); + } + + // do inference + interpreter.interpret(); + + // read outputs + std::vector<std::vector<uint8_t>> outputs_data; + for (const auto *node : output_nodes) + { + const auto output_node = loco::must_cast<const luci::CircleOutput *>(node); + + // allocate output buffer + outputs_data.emplace_back(tensor_size_of(output_node)); + + auto &data = outputs_data.back(); + interpreter.readOutputTensor(output_node, data.data(), data.size()); + } + + return outputs_data; + }; + + // import with copying, execute and save + std::vector<std::vector<uint8_t>> outputs_data_1; + { + const auto default_source = &luci::GraphBuilderRegistry::get(); + const auto module = luci::Importer(default_source).importModule(circle_model); + if (not module) + { + std::cerr << "Fail to import model with constant copying." << std::endl; + return EXIT_FAILURE; + } + + outputs_data_1 = interpret_module_and_compute_output(module); + } + + // import without copying, execute and save + std::vector<std::vector<uint8_t>> outputs_data_2; + { + const auto optimized_source = luci_interpreter::source_without_constant_copying(); + const auto module = luci::Importer(optimized_source.get()).importModule(circle_model); + if (not module) + { + std::cerr << "Fail to import model without constant copying." << std::endl; + return EXIT_FAILURE; + } + + outputs_data_2 = interpret_module_and_compute_output(module); + } + + // check all tensors are equal + assert(outputs_data_1.size() == outputs_data_2.size()); + for (uint32_t n = 0; n < outputs_data_1.size(); ++n) + { + const auto &output_1 = outputs_data_1.at(n); + const auto &output_2 = outputs_data_2.at(n); + assert(output_1.size() == output_2.size()); + + for (uint32_t o = 0; o < output_1.size(); ++o) + { + if (output_1[o] != output_2[o]) + { + std::cerr << "Values mismatch in model's output number " << n << std::endl; + return EXIT_FAILURE; + } + } + } + + std::cout << "[TEST PASSED]" << std::endl; + return EXIT_SUCCESS; +} diff --git a/compiler/embedded-import-value-test/test.lst b/compiler/embedded-import-value-test/test.lst new file mode 100644 index 000000000..924a60dcc --- /dev/null +++ b/compiler/embedded-import-value-test/test.lst @@ -0,0 +1,192 @@ +#addeval(Abs_000) +addeval(Add_000) +#addeval(Add_001) +addeval(Add_U8_000) +#addeval(AddN_000) +addeval(ArgMax_000) +addeval(ArgMax_001) +addeval(ArgMax_002) +addeval(ArgMax_003) +addeval(ArgMax_U8_000) +addeval(ArgMax_U8_001) +addeval(ArgMax_U8_002) +addeval(ArgMax_U8_003) +#addeval(ArgMin_000) +#addeval(ArgMin_001) +#addeval(ArgMin_002) +#addeval(ArgMin_003) +#addeval(ArgMin_U8_000) +#addeval(ArgMin_U8_001) +#addeval(ArgMin_U8_002) +#addeval(ArgMin_U8_003) +addeval(AveragePool2D_000) +#addeval(BatchMatMul_000) +#addeval(BatchMatMulV2_000) +#addeval(BatchMatMulV2_001) +#addeval(BatchToSpaceND_000) +addeval(Cast_000) +addeval(Cast_001) +#addeval(Ceil_000) +addeval(Concatenation_000) +addeval(Concatenation_U8_000) +addeval(Conv2D_000) +addeval(Conv2D_001) +addeval(Conv2D_002) +addeval(Conv2D_003) +addeval(Conv2D_U8_000) +addeval(Conv2D_U8_001) +#addeval(Cos_000) +addeval(DepthToSpace_000) +addeval(DepthwiseConv2D_000) +addeval(DepthwiseConv2D_U8_000) +#addeval(DepthwiseConv2D_U8_001) +addeval(DepthwiseConv2D_001) +addeval(Div_000) +addeval(ELU_000) +addeval(Equal_000) +addeval(Exp_000) +#addeval(ExpandDims_000) +#addeval(ExpandDims_001) +#addeval(ExpandDims_002) +#addeval(ExpandDims_003) +#addeval(Fill_000) +#addeval(Fill_001) +addeval(Floor_000) +#addeval(FloorDiv_000) +#addeval(FloorDiv_001) +#addeval(FloorMod_000) +#addeval(FloorMod_001) +addeval(FullyConnected_000) +addeval(FullyConnected_001) +addeval(FullyConnected_002) +#addeval(FullyConnected_U8_000) +addeval(Gather_000) +#addeval(GatherNd_000) +#addeval(Greater_000) +#addeval(GreaterEqual_000) +addeval(If_000) +addeval(If_001) +addeval(L2Normalize_000) +addeval(L2Pool2D_000) +#addeval(L2Pool2D_U8_000) +addeval(LeakyRelu_000) +addeval(Less_000) +addeval(LessEqual_000) +addeval(LocalResponseNormalization_000) +#addeval(Log_000) +addeval(LogicalAnd_000) +addeval(LogicalNot_000) +addeval(LogicalOr_000) +addeval(Logistic_000) +addeval(LogSoftmax_000) +#addeval(MatMul_000) +#addeval(MatrixDiag_000) +#addeval(MatrixSetDiag_000) +addeval(Maximum_000) +addeval(MaxPool2D_000) +addeval(MaxPool2D_U8_000) +addeval(Mean_000) +addeval(Mean_001) +#addeval(Mean_U8_000) +#addeval(Minimum_000) +#addeval(MirrorPad_000) +addeval(Mul_000) +#addeval(Mul_U8_000) +addeval(Neg_000) +addeval(NotEqual_000) +addeval(OneHot_000) +addeval(OneHot_001) +addeval(OneHot_002) +#addeval(OneHot_003) +addeval(Pack_000) +addeval(Pack_U8_000) +addeval(Pad_000) +addeval(Pad_U8_000) +addeval(Pow_000) +addeval(PRelu_000) +#addeval(Range_000) +#addeval(Rank_000) +#addeval(ReduceAny_000) +#addeval(ReduceAny_001) +#addeval(ReduceAny_002) +#addeval(ReduceAny_003) +#addeval(ReduceMax_000) +#addeval(ReduceMin_000) +#addeval(ReduceProd_000) +#addeval(ReduceProd_001) +#addeval(ReduceProd_002) +#addeval(ReduceProd_003) +addeval(ReLU_000) +addeval(ReLU6_000) +#addeval(ReLUN1To1_000) +addeval(Reshape_000) +addeval(Reshape_001) +addeval(Reshape_002) +#addeval(Reshape_003) +addeval(Reshape_U8_000) +addeval(ResizeBilinear_000) +addeval(ResizeNearestNeighbor_000) +#addeval(ReverseSequence_000) +#addeval(ReverseV2_000) +#addeval(Round_000) +addeval(Rsqrt_000) +#addeval(ScatterNd_000) +#addeval(SegmentSum_000) +#addeval(Select_000) +#addeval(Select_001) +#addeval(Select_002) +#addeval(SelectV2_000) +#addeval(SelectV2_001) +#addeval(SelectV2_002) +#addeval(Shape_000) +addeval(SignatureDef_MultiOut_000) +addeval(SignatureDef_MultiOut_001) +#addeval(Sin_000) +addeval(Slice_000) +addeval(Softmax_000) +addeval(Softmax_U8_000) +addeval(SpaceToBatchND_000) +addeval(SpaceToBatchND_001) +addeval(SpaceToBatchND_002) +addeval(SpaceToBatchND_003) +addeval(SpaceToDepth_000) +#addeval(SparseToDense_000) +addeval(Split_000) +addeval(SplitV_000) +addeval(Sqrt_000) +addeval(Square_000) +addeval(SquaredDifference_000) +addeval(Squeeze_000) +addeval(Squeeze_001) +addeval(StridedSlice_000) +addeval(StridedSlice_001) +addeval(StridedSlice_002) +addeval(Sub_000) +addeval(Sub_U8_000) +#addeval(Sum_000) +#addeval(Sum_001) +addeval(SVDF_000) +addeval(SVDF_001) +addeval(Tanh_000) +#addeval(Tile_000) +#addeval(Tile_U8_000) +#addeval(TopKV2_000) +#addeval(TopKV2_001) +addeval(Transpose_000) +addeval(TransposeConv_000) +addeval(Unpack_000) +addeval(Unpack_001) +addeval(Unpack_002) +addeval(Unpack_003) +#addeval(Where_000) +#addeval(Where_001) +#addeval(While_000) +#addeval(While_001) +#addeval(While_002) +#addeval(While_003) +addeval(YUV_TO_RGB_U8_000) +#addeval(ZerosLike_000) + +# Simple Network test +addeval(Part_While_000) +addeval(Part_While_001) diff --git a/compiler/enco/CMakeLists.txt b/compiler/enco/CMakeLists.txt index 17300e25e..3702f9501 100644 --- a/compiler/enco/CMakeLists.txt +++ b/compiler/enco/CMakeLists.txt @@ -1,4 +1,9 @@ add_subdirectory(core) add_subdirectory(frontend) add_subdirectory(cli) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_subdirectory(test) diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt index 25dad2bc6..19a64231a 100644 --- a/compiler/enco/core/CMakeLists.txt +++ b/compiler/enco/core/CMakeLists.txt @@ -20,11 +20,11 @@ target_link_libraries(enco_core PRIVATE morph) # Let's use nncc project-wide build options target_link_libraries(enco_core PRIVATE nncc_common) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) add_executable(enco_core_test ${TESTS}) target_include_directories(enco_core_test PRIVATE src) diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt index 9722392a1..baf7f7bd6 100644 --- a/compiler/enco/frontend/caffe/CMakeLists.txt +++ b/compiler/enco/frontend/caffe/CMakeLists.txt @@ -17,11 +17,11 @@ target_link_libraries(enco_caffe_frontend enco_intf_cmdline) target_link_libraries(enco_caffe_frontend morph) target_link_libraries(enco_caffe_frontend caffeproto) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) nnas_find_package(Caffe QUIET) diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt index b2de2b34b..995e66f81 100644 --- a/compiler/enco/frontend/tflite/CMakeLists.txt +++ b/compiler/enco/frontend/tflite/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) return() @@ -17,16 +17,15 @@ add_library(enco_tflite_frontend SHARED ${SOURCES}) target_include_directories(enco_tflite_frontend PRIVATE src) target_link_libraries(enco_tflite_frontend enco_intf_frontend) target_link_libraries(enco_tflite_frontend enco_intf_cmdline) -target_link_libraries(enco_tflite_frontend flatbuffers-1.10) target_link_libraries(enco_tflite_frontend enco_tflite_schema) target_link_libraries(enco_tflite_frontend morph) target_link_libraries(enco_tflite_frontend cwrap) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) add_executable(enco_tflite_frontend_test ${TESTS}) target_include_directories(enco_tflite_frontend_test PRIVATE src) diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt index 9d02f7cba..645db714c 100644 --- a/compiler/exo/CMakeLists.txt +++ b/compiler/exo/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build exo: FALSE (missing FlatBuffers)") @@ -15,7 +15,7 @@ endif(NOT TensorFlowSource_FOUND) message(STATUS "Build exo: TRUE") set(TFLITE_SCHEMA_DIR "${TensorFlowSource_DIR}/tensorflow/lite/schema") -set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema") +set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3") FlatBuffers_Target(exo_tflite_fbs OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen" diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt index 8fce31953..673d7056c 100644 --- a/compiler/hermes-std/CMakeLists.txt +++ b/compiler/hermes-std/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(hermes_std STATIC ${SOURCES}) -set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(hermes_std PUBLIC include) target_link_libraries(hermes_std PUBLIC hermes) target_link_libraries(hermes_std PRIVATE pepper_strcast) diff --git a/compiler/hermes-std/include/hermes/ConsoleReporter.h b/compiler/hermes-std/include/hermes/ConsoleReporter.h index e09dd5785..c55e46a17 100644 --- a/compiler/hermes-std/include/hermes/ConsoleReporter.h +++ b/compiler/hermes-std/include/hermes/ConsoleReporter.h @@ -28,6 +28,10 @@ namespace hermes struct ConsoleReporter final : public hermes::Sink { void notify(const Message *m) final; + void set_colored_mode(bool is_colored) { _is_colored = is_colored; } + +private: + bool _is_colored = false; }; } // namespace hermes diff --git a/compiler/hermes-std/src/ConsoleReporter.cpp b/compiler/hermes-std/src/ConsoleReporter.cpp index 3cc9f09ed..524ed59d8 100644 --- a/compiler/hermes-std/src/ConsoleReporter.cpp +++ b/compiler/hermes-std/src/ConsoleReporter.cpp @@ -17,16 +17,68 @@ #include "hermes/ConsoleReporter.h" #include <iostream> +#include <cstdlib> +#include <string> namespace hermes { +static constexpr const char *kTermColorRedTextCode = "\033[0;31m"; +static constexpr const char *kTermColorGreenTextCode = "\033[0;32m"; +static constexpr const char *kTermColorOrangeTextCode = "\033[0;33m"; +static constexpr const char *kTermColorBlueTextCode = "\033[0;34m"; +static constexpr const char *kTermColorMagentaTextCode = "\033[0;35m"; +static constexpr const char *kTermColorCyanTextCode = "\033[0;36m"; +static constexpr const char *kTermColorWhiteTextCode = "\033[0;37m"; + +static constexpr const char *kTermBoldTextCode = "\033[1m"; +static constexpr const char *kTermUnderlineTextCode = "\033[4m"; +static constexpr const char *kTermInverseTextCode = "\033[7m"; +static constexpr const char *kTermBoldOffTextCode = "\033[21m"; +static constexpr const char *kTermUnderlineOffTextCode = "\033[24m"; +static constexpr const char *kTermInverseOffTextCode = "\033[27m"; + +static constexpr const char *kTermColorResetAllCode = "\033[0m"; + void ConsoleReporter::notify(const hermes::Message *m) { + const char *env_color_p = std::getenv("ONE_HERMES_COLOR"); + if (env_color_p) + { + auto env_color_str = std::string(env_color_p); + if ((env_color_str == "1") or (env_color_str == "ON")) + _is_colored = true; + } + + if (_is_colored) + { + switch (m->get_severity()) + { + case FATAL: + std::cout << kTermColorRedTextCode << kTermBoldTextCode << kTermUnderlineTextCode; + break; + case ERROR: + std::cout << kTermColorRedTextCode; + break; + case WARN: + std::cout << kTermColorOrangeTextCode; + break; + case INFO: + std::cout << kTermColorGreenTextCode; + break; + case VERBOSE: + std::cout << kTermColorResetAllCode; + break; + }; + } for (uint32_t n = 0; n < m->text()->lines(); ++n) { std::cout << m->text()->line(n) << std::endl; } + if (_is_colored) + { + std::cout << kTermColorResetAllCode; + } } } // namespace hermes diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp index a65585a6a..d959ff3d9 100644 --- a/compiler/hermes-std/src/ConsoleReporter.test.cpp +++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp @@ -43,3 +43,168 @@ TEST(ConsoleReporterTest, notify) ASSERT_NO_THROW(r.notify(&m)); } + +TEST(ConsoleReporterTest, notify_fatal) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is colored as FATAL" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL); + } + + hermes::ConsoleReporter r; + + r.set_colored_mode(true); + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_error) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is colored as ERROR" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR); + } + + hermes::ConsoleReporter r; + + r.set_colored_mode(true); + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_warn) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is colored as WARN" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN); + } + + hermes::ConsoleReporter r; + + r.set_colored_mode(true); + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_info) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is colored as INFO" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO); + } + + hermes::ConsoleReporter r; + + r.set_colored_mode(true); + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_verbose) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is colored as VERBOSE" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE); + } + + hermes::ConsoleReporter r; + + r.set_colored_mode(true); + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_fatal_NEG) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is not colored as FATAL" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL); + } + + hermes::ConsoleReporter r; + + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_error_NEG) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is not colored as ERROR" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR); + } + + hermes::ConsoleReporter r; + + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_warn_NEG) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is not colored as WARN" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN); + } + + hermes::ConsoleReporter r; + + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_info_NEG) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is not colored as INFO" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO); + } + + hermes::ConsoleReporter r; + + ASSERT_NO_THROW(r.notify(&m)); +} + +TEST(ConsoleReporterTest, notify_verbose_NEG) +{ + hermes::Message m; + { + std::stringstream ss; + + ss << "This message is not colored as VERBOSE" << std::endl; + + m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE); + } + + hermes::ConsoleReporter r; + + ASSERT_NO_THROW(r.notify(&m)); +} diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt index e1a71c2b4..d33e2d735 100644 --- a/compiler/hermes/CMakeLists.txt +++ b/compiler/hermes/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(hermes STATIC ${SOURCES}) -set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(hermes PUBLIC include) # Let's apply nncc common compile options # diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h index 460163f64..d76f0eb6f 100644 --- a/compiler/hermes/include/hermes/core/Message.h +++ b/compiler/hermes/include/hermes/core/Message.h @@ -17,6 +17,8 @@ #ifndef __HERMES_MESSAGE_H__ #define __HERMES_MESSAGE_H__ +#include "Severity.h" + #include <memory> #include <sstream> #include <string> @@ -48,7 +50,6 @@ private: * @brief Message with metadata * * TODO Add "Timestamp" field - * TODO Add "Severity" field * TODO Support extensible "attribute" annotation */ class Message final @@ -58,10 +59,17 @@ public: public: void text(std::unique_ptr<MessageText> &&text) { _text = std::move(text); } + void text(std::unique_ptr<MessageText> &&text, SeverityCategory severity) + { + _text = std::move(text); + _severity = severity; + } const MessageText *text(void) const { return _text.get(); } + SeverityCategory get_severity(void) const { return _severity; } private: std::unique_ptr<MessageText> _text; + SeverityCategory _severity = SeverityCategory::INFO; }; } // namespace hermes diff --git a/compiler/hermes/include/hermes/core/MessageBuffer.h b/compiler/hermes/include/hermes/core/MessageBuffer.h index a2f1de74d..1e2e9b9dc 100644 --- a/compiler/hermes/include/hermes/core/MessageBuffer.h +++ b/compiler/hermes/include/hermes/core/MessageBuffer.h @@ -18,6 +18,7 @@ #define __HERMES_MESSAGE_BUFFER_H__ #include "hermes/core/MessageBus.h" +#include "hermes/core/Severity.h" #include <ostream> #include <sstream> @@ -34,6 +35,7 @@ class MessageBuffer final { public: MessageBuffer(MessageBus *); + MessageBuffer(MessageBus *bus, SeverityCategory severity); ~MessageBuffer(); public: @@ -41,6 +43,7 @@ public: private: MessageBus *_bus; + SeverityCategory _severity = SeverityCategory::INFO; /// @brief Content buffer std::stringstream _ss; diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp index a4ff4eeff..ce1f176d9 100644 --- a/compiler/hermes/src/core/MessageBuffer.cpp +++ b/compiler/hermes/src/core/MessageBuffer.cpp @@ -26,13 +26,19 @@ MessageBuffer::MessageBuffer(MessageBus *bus) : _bus{bus} // DO NOTHING } +MessageBuffer::MessageBuffer(MessageBus *bus, SeverityCategory severity) + : _bus{bus}, _severity{severity} +{ + // DO NOTHING +} + MessageBuffer::~MessageBuffer() { // NOTE The current implementation is unsafe as it may throw an excpetion. // TODO Find a better safe implementation. auto msg = std::make_unique<Message>(); - msg->text(std::make_unique<MessageText>(_ss)); + msg->text(std::make_unique<MessageText>(_ss), _severity); _bus->post(std::move(msg)); } diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp index d124f4430..cb60d9a31 100644 --- a/compiler/hermes/src/core/Source.cpp +++ b/compiler/hermes/src/core/Source.cpp @@ -60,10 +60,9 @@ void Source::deactivate(void) void Source::reload(const Config *c) { c->configure(this, _setting); } -std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const +std::unique_ptr<MessageBuffer> Source::buffer(const Severity &severity) const { - // TODO Pass Severity - return std::make_unique<MessageBuffer>(_bus); + return std::make_unique<MessageBuffer>(_bus, severity.category()); } } // namespace hermes diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt index 308f48619..34835e483 100644 --- a/compiler/locomotiv/CMakeLists.txt +++ b/compiler/locomotiv/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(locomotiv STATIC ${SOURCES}) -set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif (NOT NNCC_LIBRARY_NO_PIC) target_include_directories(locomotiv PUBLIC include) target_include_directories(locomotiv PRIVATE src) target_link_libraries(locomotiv PUBLIC loco) diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt index f02fb1a72..43ec41af4 100644 --- a/compiler/locop/CMakeLists.txt +++ b/compiler/locop/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(locop STATIC ${SOURCES}) -set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(locop PUBLIC include) target_link_libraries(locop PUBLIC loco) # Let's apply nncc common compile options diff --git a/compiler/logo-core/CMakeLists.txt b/compiler/logo-core/CMakeLists.txt index 3bc71dbd0..374794f90 100644 --- a/compiler/logo-core/CMakeLists.txt +++ b/compiler/logo-core/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(logo_core STATIC ${SOURCES}) -set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(logo_core PRIVATE src) target_include_directories(logo_core PUBLIC include) target_link_libraries(logo_core PUBLIC loco) diff --git a/compiler/logo-ex/CMakeLists.txt b/compiler/logo-ex/CMakeLists.txt new file mode 100644 index 000000000..31d76025e --- /dev/null +++ b/compiler/logo-ex/CMakeLists.txt @@ -0,0 +1,23 @@ +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(logo_ex STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(logo_ex PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(logo_ex PRIVATE src) +target_include_directories(logo_ex PUBLIC include) +target_link_libraries(logo_ex PUBLIC loco) +target_link_libraries(logo_ex PUBLIC logo_core) +target_link_libraries(logo_ex PRIVATE locomotiv) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(logo_ex_test ${TESTS}) +target_include_directories(logo_ex_test PRIVATE src) +target_link_libraries(logo_ex_test logo_ex) diff --git a/compiler/logo-ex/README.md b/compiler/logo-ex/README.md new file mode 100644 index 000000000..8ea55a202 --- /dev/null +++ b/compiler/logo-ex/README.md @@ -0,0 +1,6 @@ +# logo-ex + +_logo-ex_ provides _loco_ Extended Graph Passes for Transformation and Optimization +that gets help from _locomotiv_ + +NOTE: f2e7c38dcc601cb290c380d8314a3ae627923f58 is where this came from diff --git a/compiler/logo/include/logo/ConstantFoldingPass.h b/compiler/logo-ex/include/logo/ConstantFoldingPass.h index 99ccdc315..9143ae49b 100644 --- a/compiler/logo/include/logo/ConstantFoldingPass.h +++ b/compiler/logo-ex/include/logo/ConstantFoldingPass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __LOGO_CONSTANT_FOLDING_PASS_H__ -#define __LOGO_CONSTANT_FOLDING_PASS_H__ +#ifndef __LOGO_EX_CONSTANT_FOLDING_PASS_H__ +#define __LOGO_EX_CONSTANT_FOLDING_PASS_H__ #include <logo/Pass.h> @@ -38,4 +38,4 @@ public: } // namespace logo -#endif // __LOGO_CONSTANT_FOLDING_PASS_H__ +#endif // __LOGO_EX_CONSTANT_FOLDING_PASS_H__ diff --git a/compiler/logo-ex/include/logo/PassesEx.h b/compiler/logo-ex/include/logo/PassesEx.h new file mode 100644 index 000000000..8bdf93bd9 --- /dev/null +++ b/compiler/logo-ex/include/logo/PassesEx.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LOGO_PASSES_EX_H__ +#define __LOGO_PASSES_EX_H__ + +// Please keep this in alphabetical order + +#include <logo/ConstantFoldingPass.h> + +#endif // __LOGO_PASSES_EX_H__ diff --git a/compiler/logo-ex/requires.cmake b/compiler/logo-ex/requires.cmake new file mode 100644 index 000000000..c76183353 --- /dev/null +++ b/compiler/logo-ex/requires.cmake @@ -0,0 +1,3 @@ +require("loco") +require("logo-core") +require("locomotiv") diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp index 2bd4759ca..97d75458b 100644 --- a/compiler/logo/src/Passes/ConstantFoldingPass.cpp +++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp index 5d222eb00..ba571a7f6 100644 --- a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp +++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/compiler/logo-ex/src/TestHelper.h b/compiler/logo-ex/src/TestHelper.h new file mode 100644 index 000000000..07e3b20aa --- /dev/null +++ b/compiler/logo-ex/src/TestHelper.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TEST_HELPER_H__ +#define __TEST_HELPER_H__ + +#include <loco.h> + +namespace logo +{ +namespace test +{ + +template <typename T> T *find_first_node_by_type(loco::Graph *g) +{ + T *first_node = nullptr; + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + first_node = dynamic_cast<T *>(node); + if (first_node != nullptr) + break; + } + + return first_node; +} + +} // namespace test +} // namespace logo + +#endif // __TEST_HELPER_H__ diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt index a8efd9b03..e6a6f907f 100644 --- a/compiler/logo/CMakeLists.txt +++ b/compiler/logo/CMakeLists.txt @@ -3,12 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(logo STATIC ${SOURCES}) -set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(logo PRIVATE src) target_include_directories(logo PUBLIC include) target_link_libraries(logo PUBLIC loco) target_link_libraries(logo PUBLIC logo_core) -target_link_libraries(logo PRIVATE locomotiv) if(NOT ENABLE_TEST) return() diff --git a/compiler/logo/include/logo/Passes.h b/compiler/logo/include/logo/Passes.h index 636251e45..06fd3212b 100644 --- a/compiler/logo/include/logo/Passes.h +++ b/compiler/logo/include/logo/Passes.h @@ -19,7 +19,6 @@ // Please keep this in alphabetical order -#include <logo/ConstantFoldingPass.h> #include <logo/RemoveDeadNodePass.h> #include <logo/RemoveForwardNodePass.h> #include <logo/ReorderDecodePass.h> diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake index c76183353..3e4d227cd 100644 --- a/compiler/logo/requires.cmake +++ b/compiler/logo/requires.cmake @@ -1,3 +1,2 @@ require("loco") require("logo-core") -require("locomotiv") diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md index 4a9a34e6d..77ec5c81c 100644 --- a/compiler/luci-interpreter/README.md +++ b/compiler/luci-interpreter/README.md @@ -111,7 +111,7 @@ Note that one memory manager could be shared between multiple interpreter instan List of predefined memory managers: - `SimpleMemoryManager` This is a simple wrapper around new/delete, default one. -- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests. +- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests. - `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete. - `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs). diff --git a/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h new file mode 100644 index 000000000..375b1ae20 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ +#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ + +#include <luci/Import/GraphBuilderRegistry.h> + +namespace luci_interpreter +{ + +/** + * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from + * model's file. + * + * @warning Use this source only in case when model's buffer alive longer than Interpreter. + */ +std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying(); + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h index 7dee8a7f2..8e2f457a5 100644 --- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h +++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h @@ -50,7 +50,9 @@ public: class Interpreter { public: - explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr); + explicit Interpreter(const luci::Module *module); + + explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager); ~Interpreter(); @@ -69,7 +71,6 @@ private: // the order of deletion in the destructor std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr; std::unique_ptr<class RuntimeModule> _runtime_module; - IMemoryManager *_memory_manager = nullptr; // Observer functionality support. std::unique_ptr<struct RuntimeToIR> _runtime_to_ir; diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst index 771974afe..d134a6b95 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation) REGISTER_KERNEL(Conv2D) REGISTER_KERNEL(DepthToSpace) REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual) REGISTER_KERNEL(Pad) REGISTER_KERNEL(PadV2) REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) @@ -50,6 +53,7 @@ REGISTER_KERNEL(Square) REGISTER_KERNEL(SquaredDifference) REGISTER_KERNEL(Squeeze) REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) REGISTER_KERNEL(Tanh) REGISTER_KERNEL(Transpose) REGISTER_KERNEL(TransposeConv) diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h new file mode 100644 index 000000000..a274afb7e --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + assert(scratchpad_data != nullptr); + + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + assert(batches == 1); + + const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = 1; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = depth; + + cmsis_nn_dims output_dims; + output_dims.n = 1; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = depth; + + cmsis_nn_pool_params pool_params; + pool_params.stride.h = params.stride_height; + pool_params.stride.w = params.stride_width; + pool_params.padding.h = params.padding_values.height; + pool_params.padding.w = params.padding_values.width; + pool_params.activation.min = params.quantized_activation_min; + pool_params.activation.max = params.quantized_activation_max; + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = params.filter_height; + filter_dims.w = params.filter_width; + filter_dims.c = 1; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims, + output_data); + assert(res == ARM_MATH_SUCCESS); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + if (input_data_type == luci_interpreter::DataType::S8) + { + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + const int32_t output_width = output_shape.Dims(2); + const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth); + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h index 0a8ae4e48..cfb84ea60 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h @@ -19,6 +19,8 @@ #include <tensorflow/lite/kernels/internal/reference/conv.h> #include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> namespace luci_interpreter_pal { @@ -26,11 +28,11 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const float *input_data, const tflite::RuntimeShape &filter_shape, const float *filter_data, const tflite::RuntimeShape &bias_shape, const float *bias_data, const tflite::RuntimeShape &output_shape, - float *output_data, const tflite::RuntimeShape &im2col_shape, - float *im2col_data) + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, tflite::RuntimeShape(), nullptr); @@ -40,14 +42,14 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const uint8 *input_data, const tflite::RuntimeShape &filter_shape, const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - uint8 *output_data, const tflite::RuntimeShape &im2col_shape, - uint8 *im2col_data) + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, - bias_shape, bias_data, output_shape, output_data, im2col_shape, - im2col_data, nullptr); + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); } static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, @@ -55,14 +57,141 @@ static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_ const int8 *input_data, const tflite::RuntimeShape &filter_shape, const int8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - int8 *output_data, const tflite::RuntimeShape &im2col_shape, - int8 *im2col_data) + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; - tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, - filter_shape, filter_data, bias_shape, bias_data, - output_shape, output_data); + if (scratchpad_data) + { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + assert(conv_params.dilation.h == 1); + assert(conv_params.dilation.w == 1); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + conv_params.activation.min = params.quantized_activation_min; + conv_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = const_cast<int32_t *>(mult); + quant_params.shift = const_cast<int32_t *>(shifts); + + assert(conv_params.activation.min <= conv_params.activation.max); + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = input_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data, + &filter_dims, filter_data, &bias_dims, bias_data, + &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 && + conv_params.dilation.w == 1) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_height; + filter_dims.w = filter_width; + filter_dims.c = input_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = output_height; + output_dims.w = output_width; + output_dims.c = output_depth; + + const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, + &filter_dims, &output_dims); + + luci_interpreter::Shape scratchpad_shape{buf_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } } } // namespace luci_interpreter_pal diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h new file mode 100644 index 000000000..120dcd803 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + if (scratchpad_data) + { + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + assert(dw_conv_params.dilation.h == 1); + assert(dw_conv_params.dilation.w == 1); + + dw_conv_params.input_offset = params.input_offset; + dw_conv_params.output_offset = params.output_offset; + dw_conv_params.stride.h = params.stride_height; + dw_conv_params.stride.w = params.stride_width; + dw_conv_params.padding.h = params.padding_values.height; + dw_conv_params.padding.w = params.padding_values.width; + + dw_conv_params.activation.min = params.quantized_activation_min; + dw_conv_params.activation.max = params.quantized_activation_max; + dw_conv_params.ch_mult = params.depth_multiplier; + + cmsis_nn_per_channel_quant_params quant_params; + int32_t output_multiplier = params.output_multiplier; + int32_t output_shift = params.output_shift; + + quant_params.multiplier = &output_multiplier; + quant_params.shift = &output_shift; + + assert(dw_conv_params.activation.min <= dw_conv_params.activation.max); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims, + input_data, &filter_dims, filter_data, &bias_dims, + bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 && + dw_conv_params.dilation.w == 1) + { + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &output_dims); + + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h new file mode 100644 index 000000000..15ff0327b --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h new file mode 100644 index 000000000..32e905761 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + assert(output_shape.DimensionsCount() == 2); + + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + + const int filter_dim_count = filter_shape.DimensionsCount(); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + + cmsis_nn_fc_params fc_params; + fc_params.input_offset = params.input_offset; + fc_params.output_offset = params.output_offset; + fc_params.filter_offset = params.weights_offset; + fc_params.activation.min = params.quantized_activation_min; + fc_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_tensor_quant_params quant_params; + quant_params.multiplier = params.output_multiplier; + quant_params.shift = params.output_shift; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = 1; + input_dims.w = 1; + input_dims.c = accum_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = accum_depth; + filter_dims.h = 1; + filter_dims.w = 1; + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = 1; + output_dims.w = 1; + output_dims.c = output_depth; + + int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + auto buffer = std::make_unique<int8_t[]>(buf_size); + assert(buffer != nullptr); + + cmsis_nn_context ctx; + ctx.buf = buffer.get(); + ctx.size = buf_size; + + auto res = + arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims, + filter_data, &bias_dims, bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h index 2b46b100c..347a97a83 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h @@ -21,21 +21,21 @@ namespace luci_interpreter_pal { +template <typename T> static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, - const float *input1_data, const tflite::RuntimeShape &input2_shape, - const float *input2_data, const tflite::RuntimeShape &output_shape, - float *output_data) + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) { tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); } -static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, - const tflite::RuntimeShape &input1_shape, - const float *input1_data, - const tflite::RuntimeShape &input2_shape, - const float *input2_data, - const tflite::RuntimeShape &output_shape, float *output_data) +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) { tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h new file mode 100644 index 000000000..6046789ae --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h new file mode 100644 index 000000000..a4a5b2a78 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t memory_size = weight_time_shape.Dims(1); + + cmsis_nn_dims input_dims; + input_dims.n = input_shape.Dims(0); + input_dims.h = input_shape.Dims(1); + + cmsis_nn_dims weights_feature_dims; + weights_feature_dims.n = weight_feature_shape.Dims(0); + weights_feature_dims.h = weight_feature_shape.Dims(1); + + cmsis_nn_dims weights_time_dims; + weights_time_dims.n = weight_time_shape.Dims(0); + weights_time_dims.h = weight_time_shape.Dims(1); + + cmsis_nn_dims bias_dims; + bias_dims.n = bias_shape.Dims(0); + + cmsis_nn_dims state_dims; + state_dims.n = batch_size; + state_dims.h = memory_size * num_filters; + + cmsis_nn_dims output_dims; + output_dims.n = output_shape.Dims(0); + output_dims.h = output_shape.Dims(1); + + cmsis_nn_svdf_params svdf_params; + svdf_params.rank = params.rank; + svdf_params.input_offset = input_zp; + svdf_params.output_offset = output_zp; + + svdf_params.input_activation.min = INT16_MIN; + svdf_params.input_activation.max = INT16_MAX; + + svdf_params.output_activation.min = INT8_MIN; + svdf_params.output_activation.max = INT8_MAX; + + cmsis_nn_per_tensor_quant_params in_quant_params; + in_quant_params.multiplier = scale_1_a; + in_quant_params.shift = scale_1_b; + + cmsis_nn_per_tensor_quant_params out_quant_params; + out_quant_params.multiplier = scale_2_a; + out_quant_params.shift = scale_2_b; + + cmsis_nn_context scratch_ctx; + scratch_ctx.buf = scratchpad_data; + + cmsis_nn_context scratch_output_ctx; + scratch_output_ctx.buf = output_temp_data; + + arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params, + &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims, + weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data, + &output_dims, output_data); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not supported for cmsisnn"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake index 9a25a3c5d..a68b363d9 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake +++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake @@ -42,9 +42,12 @@ macro(add_pal_to_target TGT) "${TensorFlowSource_DIR}") target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) - set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c") + list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES}) - set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON) target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE "${TensorFlowRuySource_DIR}" "${TensorFlowGEMMLowpSource_DIR}" @@ -53,7 +56,7 @@ macro(add_pal_to_target TGT) ) add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN) - target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE + target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC "${CMSISSource_DIR}/CMSIS/NN/Include" "${CMSISSource_DIR}/CMSIS/DSP/Include" "${CMSISSource_DIR}/CMSIS/Core/Include") diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst index 9d541276c..428b15ee0 100644 --- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -1,19 +1,23 @@ REGISTER_KERNEL(Add) REGISTER_KERNEL(ArgMax) REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchMatMul) REGISTER_KERNEL(BatchToSpaceND) REGISTER_KERNEL(Cast) REGISTER_KERNEL(Concatenation) REGISTER_KERNEL(Conv2D) REGISTER_KERNEL(DepthToSpace) REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Gather) REGISTER_KERNEL(Greater) REGISTER_KERNEL(GreaterEqual) REGISTER_KERNEL(If) @@ -37,11 +41,13 @@ REGISTER_KERNEL(MirrorPad) REGISTER_KERNEL(Mul) REGISTER_KERNEL(Neg) REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(OneHot) REGISTER_KERNEL(Pack) REGISTER_KERNEL(Pad) REGISTER_KERNEL(PadV2) REGISTER_KERNEL(Pow) REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) REGISTER_KERNEL(Relu) REGISTER_KERNEL(Relu6) REGISTER_KERNEL(Reshape) @@ -61,6 +67,7 @@ REGISTER_KERNEL(Square) REGISTER_KERNEL(SquaredDifference) REGISTER_KERNEL(Squeeze) REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) REGISTER_KERNEL(Tanh) REGISTER_KERNEL(Transpose) REGISTER_KERNEL(TransposeConv) diff --git a/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h new file mode 100644 index 000000000..cce30601f --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h new file mode 100644 index 000000000..3894f2d92 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H +#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H + +#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h> + +namespace luci_interpreter_pal +{ +inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data, + const tflite::RuntimeShape &rhs_shape, const float *rhs_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad, + luci_interpreter::Tensor *rhs_scratchpad, + const tflite::RuntimeShape &lhs_shape, + const tflite::RuntimeShape &rhs_shape) +{ + // Scratchpad for transposed LHS + { + auto lhs_rank = lhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(lhs_rank); + for (int i = 0; i < lhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = lhs_shape.Dims(i); + } + scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1); + scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2); + + lhs_scratchpad->resize(scratchpad_size); + } + // Scratchpad for transposed RHS + { + auto rhs_rank = rhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(rhs_rank); + for (int i = 0; i < rhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = rhs_shape.Dims(i); + } + scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1); + scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2); + + rhs_scratchpad->resize(scratchpad_size); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h index 2550dd5d7..985a15f39 100644 --- a/compiler/luci-interpreter/pal/linux/PALConv2d.h +++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h @@ -26,14 +26,24 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const float *input_data, const tflite::RuntimeShape &filter_shape, const float *filter_data, const tflite::RuntimeShape &bias_shape, const float *bias_data, const tflite::RuntimeShape &output_shape, - float *output_data, const tflite::RuntimeShape &im2col_shape, - float *im2col_data) + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) { - if (im2col_data) + (void)scratchpad_shape; + if (scratchpad_data) { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + tflite::RuntimeShape im2col_shape{batches, output_height, output_width, + input_depth * filter_height * filter_width}; + tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, im2col_shape, - im2col_data); + scratchpad_data); } else tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, @@ -45,8 +55,8 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const uint8 *input_data, const tflite::RuntimeShape &filter_shape, const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - uint8 *output_data, const tflite::RuntimeShape &im2col_shape, - uint8 *im2col_data) + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) { // TODO This should only be done once (although it takes only a few microseconds). // Also, the user should be able to adjust the number of threads. @@ -54,8 +64,8 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency())); tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, - bias_shape, bias_data, output_shape, output_data, im2col_shape, - im2col_data, gemmlowp_context.get()); + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, gemmlowp_context.get()); } static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, @@ -63,17 +73,55 @@ static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_ const int8 *input_data, const tflite::RuntimeShape &filter_shape, const int8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - int8 *output_data, const tflite::RuntimeShape &im2col_shape, - int8 *im2col_data) + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; // TODO enable optimized version tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data); } +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + + // Allocate tensor for scratchpad, if needed. + // The checks here should be aligned with the actual implementation. + const bool need_dilated_scratchpad = + params.dilation_height_factor != 1 || params.dilation_width_factor != 1; + const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 || + filter_height != 1 || filter_width != 1; + auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 && + (need_dilated_scratchpad || need_non_dilated_scratchpad); + + if (_need_scratchpad) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height * + filter_width * data_type_size; + luci_interpreter::Shape scratchpad_shape{scratchpad_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + } // namespace luci_interpreter_pal #endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h new file mode 100644 index 000000000..c9d1a2948 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-interpreter/pal/linux/PALDequantize.h new file mode 100644 index 000000000..3af6d0777 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALDequantize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h new file mode 100644 index 000000000..62970dbf7 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-interpreter/pal/linux/PALGather.h new file mode 100644 index 000000000..49ac35f93 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALGather.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_GATHER_H +#define LUCI_INTERPRETER_PAL_GATHER_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T, typename CoordsT = int32> +static inline void Gather(const tflite::GatherParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_GATHER_H diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h index cfaec1b58..a8a9d4abc 100644 --- a/compiler/luci-interpreter/pal/linux/PALMul.h +++ b/compiler/luci-interpreter/pal/linux/PALMul.h @@ -21,21 +21,31 @@ namespace luci_interpreter_pal { +template <typename T> static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, - const float *input1_data, const tflite::RuntimeShape &input2_shape, - const float *input2_data, const tflite::RuntimeShape &output_shape, - float *output_data) + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) { tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); } -static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, - const tflite::RuntimeShape &input1_shape, - const float *input1_data, - const tflite::RuntimeShape &input2_shape, - const float *input2_data, - const tflite::RuntimeShape &output_shape, float *output_data) +template <> +inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const int64_t *input1_data, const tflite::RuntimeShape &input2_shape, + const int64_t *input2_data, const tflite::RuntimeShape &output_shape, + int64_t *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) { tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); diff --git a/compiler/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-interpreter/pal/linux/PALQuantize.h new file mode 100644 index 000000000..bf1d7954e --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-interpreter/pal/linux/PALSVDF.h new file mode 100644 index 000000000..0ffba14f0 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSVDF.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <tensorflow/lite/kernels/internal/reference/svdf.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + tflite::reference_ops::EvalIntegerSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, activation_state_data, output_shape, + output_data, scratchpad_data, output_temp_data, scale_1_a, + scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::EvalFloatSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, scratchpad_data, + activation_state_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake index 84349e0bf..185700cf9 100644 --- a/compiler/luci-interpreter/pal/linux/pal.cmake +++ b/compiler/luci-interpreter/pal/linux/pal.cmake @@ -40,7 +40,35 @@ macro(add_pal_to_target TGT) # TODO put it back, I changed my mind. # instead add sources with visitors in this library - set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + + if(BUILD_ARM32_NEON) + # NOTE may need to revise this list for version upgrade + set(PAL_SOURCES ${PAL_SOURCES} + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc + ${TensorFlowRuySource_DIR}/ruy/allocator.cc + ${TensorFlowRuySource_DIR}/ruy/block_map.cc + ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc + ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc + ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc + ${TensorFlowRuySource_DIR}/ruy/ctx.cc + ${TensorFlowRuySource_DIR}/ruy/denormal.cc + ${TensorFlowRuySource_DIR}/ruy/frontend.cc + ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc + ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc + ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc + ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc + ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc + ${TensorFlowRuySource_DIR}/ruy/trmul.cc + ${TensorFlowRuySource_DIR}/ruy/tune.cc + ${TensorFlowRuySource_DIR}/ruy/wait.cc + ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc + ) + endif(BUILD_ARM32_NEON) + add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES}) set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst index 771974afe..d134a6b95 100644 --- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation) REGISTER_KERNEL(Conv2D) REGISTER_KERNEL(DepthToSpace) REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual) REGISTER_KERNEL(Pad) REGISTER_KERNEL(PadV2) REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) @@ -50,6 +53,7 @@ REGISTER_KERNEL(Square) REGISTER_KERNEL(SquaredDifference) REGISTER_KERNEL(Squeeze) REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) REGISTER_KERNEL(Tanh) REGISTER_KERNEL(Transpose) REGISTER_KERNEL(TransposeConv) diff --git a/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h new file mode 100644 index 000000000..cce30601f --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h index 0a8ae4e48..13976877a 100644 --- a/compiler/luci-interpreter/pal/mcu/PALConv2d.h +++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h @@ -26,11 +26,11 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const float *input_data, const tflite::RuntimeShape &filter_shape, const float *filter_data, const tflite::RuntimeShape &bias_shape, const float *bias_data, const tflite::RuntimeShape &output_shape, - float *output_data, const tflite::RuntimeShape &im2col_shape, - float *im2col_data) + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, tflite::RuntimeShape(), nullptr); @@ -40,14 +40,14 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS const uint8 *input_data, const tflite::RuntimeShape &filter_shape, const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - uint8 *output_data, const tflite::RuntimeShape &im2col_shape, - uint8 *im2col_data) + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, - bias_shape, bias_data, output_shape, output_data, im2col_shape, - im2col_data, nullptr); + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); } static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, @@ -55,16 +55,31 @@ static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_ const int8 *input_data, const tflite::RuntimeShape &filter_shape, const int8 *filter_data, const tflite::RuntimeShape &bias_shape, const int32 *bias_data, const tflite::RuntimeShape &output_shape, - int8 *output_data, const tflite::RuntimeShape &im2col_shape, - int8 *im2col_data) + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) { - (void)im2col_shape; - (void)im2col_data; + (void)scratchpad_shape; + (void)scratchpad_data; tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data); } +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + (void)input_data_type; + (void)params; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + scratchpad->set_allocatable(false); +} + } // namespace luci_interpreter_pal #endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h new file mode 100644 index 000000000..c9d1a2948 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h new file mode 100644 index 000000000..15ff0327b --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h new file mode 100644 index 000000000..048624d74 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h index 2b46b100c..347a97a83 100644 --- a/compiler/luci-interpreter/pal/mcu/PALMul.h +++ b/compiler/luci-interpreter/pal/mcu/PALMul.h @@ -21,21 +21,21 @@ namespace luci_interpreter_pal { +template <typename T> static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, - const float *input1_data, const tflite::RuntimeShape &input2_shape, - const float *input2_data, const tflite::RuntimeShape &output_shape, - float *output_data) + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) { tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); } -static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, - const tflite::RuntimeShape &input1_shape, - const float *input1_data, - const tflite::RuntimeShape &input2_shape, - const float *input2_data, - const tflite::RuntimeShape &output_shape, float *output_data) +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) { tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h new file mode 100644 index 000000000..6046789ae --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-interpreter/pal/mcu/PALSVDF.h new file mode 100644 index 000000000..3bba668fb --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALSVDF.h @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <tensorflow/lite/kernels/internal/reference/svdf.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int n_rank = params.rank; + const int n_batch = input_shape.Dims(0); + const int n_input = input_shape.Dims(1); + const int n_filter = weight_feature_shape.Dims(0); + const int n_unit = n_filter / n_rank; + const int n_memory = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + int16_t *new_state_start = activation_state_data; + const int16_t *old_state_start = activation_state_data + 1; + const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Feature matmul. + { + const int32_t output_max = std::numeric_limits<int16_t>::max(); + const int32_t output_min = std::numeric_limits<int16_t>::min(); + int16_t *result_in_batch = activation_state_data + (n_memory - 1); + for (int b = 0; b < n_batch; b++) + { + const int8_t *matrix_ptr = weight_feature_data; + for (int r = 0; r < n_filter; r++) + { + int32_t dot_prod = 0; + const int8_t *vector_in_batch = input_data + b * n_input; + for (int c = 0; c < n_input; c++) + { + dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp); + } + dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b); + dot_prod = std::min(std::max(output_min, dot_prod), output_max); + // This assumes state is symmetrically quantized. Otherwise last bit of + // state should be initialized to its zero point and accumulate the + // dot_prod. + // Equivalent as the following: + // result_in_batch = zero point, which happens to be zero. + // result_in_batch += dot_prod_56. + *result_in_batch = dot_prod; + result_in_batch += n_memory; + } + } + } + + // Time. + { + for (int b = 0; b < n_batch; ++b) + { + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Perform batched vector dot product: + const int16_t *vector1_ptr = weight_time_data; + const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter; + + for (int i = 0; i < n_filter; i++) + { + *scratch_ptr_batch = 0; + for (int j = 0; j < n_memory; j++) + { + *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; + } + scratch_ptr_batch++; + } + } + } + + // Reduce, add bias, rescale, activation. + { + // Add bias. + if (bias_data) + { + // Vector batch assign: + for (int i = 0; i < n_batch; ++i) + { + int32_t *output_ptr = output_temp_data + i * n_unit; + const int32_t *bias_ptr = bias_data; + for (int j = 0; j < n_unit; ++j) + { + *output_ptr++ = *bias_ptr++; + } + } + } + else + { + int32_t *output_ptr = output_temp_data; + for (int i = 0; i < n_batch * n_unit; ++i) + { + *output_ptr++ = 0; + } + } + + // Reduce. + for (int b = 0; b < n_batch; ++b) + { + int32_t *output_temp_ptr = output_temp_data + b * n_unit; + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Reduction sum vector + for (int i = 0; i < n_unit; ++i) + { + for (int j = 0; j < n_rank; ++j) + { + output_temp_ptr[i] += *scratch_ptr_batch++; + } + } + } + + // Rescale. + const int32_t output_max = std::numeric_limits<int8_t>::max(); + const int32_t output_min = std::numeric_limits<int8_t>::min(); + for (int i = 0; i < n_batch * n_unit; ++i) + { + int32_t x1 = output_temp_data[i]; + int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b); + int32_t x3 = x2 + output_zp; + int32_t x4 = std::min(std::max(output_min, x3), output_max); + output_data[i] = static_cast<int8_t>(x4); + } + } +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for mcu platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake index a479d407b..907d51de6 100644 --- a/compiler/luci-interpreter/pal/mcu/pal.cmake +++ b/compiler/luci-interpreter/pal/mcu/pal.cmake @@ -39,7 +39,9 @@ macro(add_pal_to_target TGT) # TODO put it back, I changed my mind. # instead add sources with visitors in this library - set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES}) set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(luci_interpreter_mcu_pal PRIVATE diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt index e37150336..997b75a84 100644 --- a/compiler/luci-interpreter/src/CMakeLists.txt +++ b/compiler/luci-interpreter/src/CMakeLists.txt @@ -13,6 +13,7 @@ set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}") set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}") set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}") set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}") add_subdirectory(core) message(STATUS "LUCI INTERPRETER CORE") @@ -20,6 +21,8 @@ add_subdirectory(kernels) message(STATUS "LUCI INTERPRETER KERNELS") add_subdirectory(loader) message(STATUS "LUCI INTERPRETER LOADER") +add_subdirectory(import) +message(STATUS "LUCI INTERPRETER IMPORT") message(STATUS "LUCI INTERPTER INITALIZED") diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp index 1b8792a6c..8cf272efd 100644 --- a/compiler/luci-interpreter/src/Interpreter.cpp +++ b/compiler/luci-interpreter/src/Interpreter.cpp @@ -70,25 +70,30 @@ private: } // namespace +Interpreter::Interpreter(const luci::Module *module) +{ + _runtime_to_ir = std::make_unique<RuntimeToIR>(); + _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); + + _default_memory_manager = std::make_unique<SimpleMemoryManager>(); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + _default_memory_manager.get()); + loader.load(); +} + Interpreter::Interpreter(const luci::Module *module, luci_interpreter::IMemoryManager *memory_manager) { + assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead"); + _runtime_to_ir = std::make_unique<RuntimeToIR>(); _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); - if (memory_manager == nullptr) - { - _default_memory_manager = std::make_unique<SimpleMemoryManager>(); - _memory_manager = _default_memory_manager.get(); - } - else - { - _memory_manager = memory_manager; - } - ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, - _memory_manager); + memory_manager); loader.load(); } diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt index 4430cba11..c2471e01c 100644 --- a/compiler/luci-interpreter/src/core/CMakeLists.txt +++ b/compiler/luci-interpreter/src/core/CMakeLists.txt @@ -10,7 +10,9 @@ set(SOURCES Tensor.cpp) add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES}) -set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang) diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h index ee0390fcc..958fd4b74 100644 --- a/compiler/luci-interpreter/src/core/KernelParams.h +++ b/compiler/luci-interpreter/src/core/KernelParams.h @@ -43,6 +43,12 @@ struct ArgMaxParams DataType output_type; }; +struct BatchMatMulParams +{ + bool adj_x; + bool adj_y; +}; + struct ConcatenationParams { int axis; @@ -83,6 +89,13 @@ struct DivParams struct FullyConnectedParams { Activation activation; + bool keep_num_dims = false; +}; + +struct GatherParams +{ + int32_t axis; + int32_t batch_dims; }; struct InstanceNormParams @@ -119,6 +132,11 @@ struct MulParams Activation activation; }; +struct OneHotParams +{ + int32_t axis; +}; + struct PackParams { int32_t values_count; @@ -157,6 +175,13 @@ struct SubParams Activation activation; }; +struct SVDFParams +{ + bool asymmetric_quantize_inputs; + int32_t svdf_rank; + Activation activation; +}; + struct SpaceToDepthParams { int block_size; diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt new file mode 100644 index 000000000..dd9733f92 --- /dev/null +++ b/compiler/luci-interpreter/src/import/CMakeLists.txt @@ -0,0 +1,15 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h" + GraphBuilderRegistry.cpp) + +# include specific builders +file(GLOB_RECURSE NODES "Nodes/*") +list(APPEND SOURCES ${NODES}) + +add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) + +target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import) diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp new file mode 100644 index 000000000..a33bca6a4 --- /dev/null +++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci_interpreter/GraphBuilderRegistry.h" +#include "Nodes/CircleReferencingConst.h" + +namespace luci_interpreter +{ + +std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying() +{ + auto builder = std::make_unique<luci::GraphBuilderRegistry>(); + { + // redefine NodeBuilder of BUFFER type + builder->add(std::make_unique<CircleReferencingConstNodeBuilder>()); + } + + return builder; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp new file mode 100644 index 000000000..14e90f240 --- /dev/null +++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleReferencingConst.h" + +#include <vector> + +namespace +{ + +// helper struct which describes data loaded to custom_options of CircleReferencingConst node +struct ConstDataReference +{ + const uint8_t *data = nullptr; + uint32_t size = 0; +}; + +} // namespace + +namespace luci_interpreter +{ +using namespace luci; + +CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index, + GraphBuilderContext *context) const +{ + assert(tensor_index >= 0); + + const auto graph = context->graph(); + const auto reader = context->reader(); + const auto tensors = reader->tensors(); + auto const const_tensor = tensors[tensor_index]; + assert(const_tensor != nullptr); + if (const_tensor->is_variable()) + { + // Create CircleVariable for variable + return nullptr; + } + + auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + auto const const_dims = wrap(const_tensor->shape()); // in NHWC + if (const_dims.empty() && buffer.empty()) + { + // unknown shape tensor and scalar tensor + return nullptr; + } + + // if tensor_index is used as output to some other operator, this is not a constant + auto tensoroutputs = context->tensoroutputs(); + if (tensoroutputs->find(tensor_index)) + { + // other operator output tensor + return nullptr; + } + + uint32_t num_elements = 1; + for (uint32_t r = 0; r < const_dims.size(); ++r) + { + num_elements = num_elements * const_dims[r]; + } + + if (buffer.empty() && num_elements > 0) + { + // normal empty tensor + return nullptr; + } + + // create CircleReferencingConst + auto custom_node = graph->nodes()->create<CircleCustom>(0, 1); + { + custom_node->custom_code("CircleReferencingConst"); + + copy_tensor_attributes(const_tensor, custom_node); + custom_node->shape_status(luci::ShapeStatus::VALID); + + // custom options stores size of buffer and pointer's value to buffer's data + { + std::vector<uint8_t> custom_options(sizeof(ConstDataReference)); + { + auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data()); + const_data_ref = {buffer.data(), buffer.size()}; + } + custom_node->custom_options(custom_options); + } + } + + // Output of CircleCustom node presented with CircleConstNode + auto out_node = graph->nodes()->create<CircleCustomOut>(); + { + out_node->index(0); + out_node->input(custom_node); + + copy_tensor_attributes(const_tensor, out_node); + out_node->shape_status(luci::ShapeStatus::VALID); + } + + return out_node; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h new file mode 100644 index 000000000..ed8f95124 --- /dev/null +++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ +#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ + +#include <luci/Import/NodeBuilder.h> + +#include <luci/IR/Nodes/CircleConst.h> + +namespace luci_interpreter +{ +using namespace luci; + +/** + * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer. + */ +class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER> +{ +public: + CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final; +}; + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp index 7381c3849..d7bf3084f 100644 --- a/compiler/luci-interpreter/src/kernels/Add.cpp +++ b/compiler/luci-interpreter/src/kernels/Add.cpp @@ -38,8 +38,11 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa void Add::configure() { LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); if (input1()->element_type() == DataType::S16) { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1); LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && output()->zero_point() == 0); } @@ -54,6 +57,12 @@ void Add::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -67,13 +76,8 @@ void Add::execute() const void Add::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); - tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<float>(params, _params.activation); const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( getTensorShape(input1()), getTensorShape(input2()), ¶ms); @@ -92,6 +96,28 @@ void Add::evalFloat() const } } +template <typename T> void Add::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + void Add::evalQuantized() const { const auto input1_scale = static_cast<double>(input1()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h index 79518845d..91d95b6af 100644 --- a/compiler/luci-interpreter/src/kernels/Add.h +++ b/compiler/luci-interpreter/src/kernels/Add.h @@ -39,6 +39,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; void evalQuantizedS16() const; }; diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp index 847b65667..b8b1c3089 100644 --- a/compiler/luci-interpreter/src/kernels/Add.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp @@ -166,6 +166,69 @@ TEST_F(AddTest, Float) } } +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1, + 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7}, + {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7}, + {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0, + 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7}, + {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(AddTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + TEST_F(AddTest, SInt16) { Shape base_shape = {2, 3, 1, 2}; @@ -248,11 +311,24 @@ TEST_F(AddTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST_F(AddTest, Invalid_Input_Type_NEG) +TEST_F(AddTest, Invalid_Output_Type_NEG) { Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::S64); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); AddParams params{}; params.activation = Activation::RELU; @@ -263,6 +339,19 @@ TEST_F(AddTest, Invalid_Input_Type_NEG) EXPECT_ANY_THROW(kernel.execute()); } +TEST_F(AddTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp index 119c69ccf..474f4b321 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -57,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ArgMaxTest, DataTypes); +TYPED_TEST_SUITE(ArgMaxTest, DataTypes); TYPED_TEST(ArgMaxTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp index 5545fb4d4..d3bade9e4 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -18,8 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> -#include <tensorflow/lite/kernels/internal/reference/pooling.h> +#include "PALAveragePool2d.h" #include <stdexcept> @@ -29,8 +28,9 @@ namespace luci_interpreter namespace kernels { -AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) - : KernelWithParams<Pool2DParams>({input}, {output}, params) +AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms) + : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params) { } @@ -76,6 +76,10 @@ void AveragePool2D::configure() LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); } output()->resize({batches, output_height, output_width, depth}); + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), + getTensorShape(input()), getTensorShape(output())); } void AveragePool2D::execute() const @@ -155,9 +159,14 @@ void AveragePool2D::evalSInt8() const params.quantized_activation_min = activation_min; params.quantized_activation_max = activation_max; - tflite::reference_integer_ops::AveragePool( + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::AveragePool<int8_t>( params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()), - getTensorData<int8_t>(output())); + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); } void AveragePool2D::evalSInt16() const diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h index b98367f31..2c8fe16e7 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h @@ -28,7 +28,8 @@ namespace kernels class AveragePool2D : public KernelWithParams<Pool2DParams> { public: - AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } Tensor *output() const { return _outputs[0]; } diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp index 7ed421129..478bfa68e 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -46,6 +46,7 @@ TEST_F(AveragePool2DTest, Float) Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -55,8 +56,9 @@ TEST_F(AveragePool2DTest, Float) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); _memory_manager->allocate_memory(output_tensor); kernel.execute(); @@ -78,6 +80,7 @@ TEST_F(AveragePool2DTest, Uint8_0) Tensor input_tensor = makeInputTensor<DataType::U8>( {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -87,8 +90,9 @@ TEST_F(AveragePool2DTest, Uint8_0) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); _memory_manager->allocate_memory(output_tensor); kernel.execute(); @@ -107,6 +111,7 @@ TEST_F(AveragePool2DTest, Uint8_1) Tensor input_tensor = makeInputTensor<DataType::U8>( {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -116,9 +121,10 @@ TEST_F(AveragePool2DTest, Uint8_1) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0})); @@ -141,6 +147,7 @@ TEST_F(AveragePool2DTest, SInt16) Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -150,8 +157,9 @@ TEST_F(AveragePool2DTest, SInt16) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); _memory_manager->allocate_memory(output_tensor); kernel.execute(); @@ -174,6 +182,7 @@ TEST_F(AveragePool2DTest, SInt8) Tensor input_tensor = makeInputTensor<DataType::S8>( input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -183,8 +192,9 @@ TEST_F(AveragePool2DTest, SInt8) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); _memory_manager->allocate_memory(output_tensor); kernel.execute(); @@ -203,6 +213,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -212,7 +223,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } @@ -227,6 +238,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG) Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -236,7 +248,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } @@ -252,6 +264,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG) Tensor input_tensor = makeInputTensor<DataType::U8>( {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -261,7 +274,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp new file mode 100644 index 000000000..24ca22996 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/Utils.h" + +#include "PALBatchMatMul.h" + +#include <tensorflow/lite/kernels/internal/reference/transpose.h> + +#include <stdexcept> + +namespace +{ + +tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape) +{ + tflite::RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, + Tensor *y_tmp, const BatchMatMulParams ¶ms) + : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params) +{ +} + +void BatchMatMul::configure() +{ + auto lhs = x(); + auto rhs = y(); + auto adj_x = params().adj_x; + auto adj_y = params().adj_y; + + // TODO Support non-float types + if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32) + throw std::runtime_error("Unsupported type."); + + LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type()); + + auto lhs_rank = lhs->shape().num_dims(); + auto rhs_rank = rhs->shape().num_dims(); + LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4); + LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4); + + auto lhs_scratchpad = temp_lhs(); + auto rhs_scratchpad = temp_rhs(); + luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs), + getTensorShape(rhs)); + + auto output_rank = std::max(lhs_rank, rhs_rank); + + auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs)); + auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs)); + + // Ensure any batch dimensions obey broacasting rules. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + if (lhs_dim != rhs_dim) + { + if (lhs_dim != 1) + { + LUCI_INTERPRETER_CHECK(rhs_dim == 1); + } + } + } + + // Ensure other dimensions work for matrix multiplication. + int accum_dim_lhs = + adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1); + int accum_dim_rhs = + adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2); + LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs); + + Shape output_shape(output_rank); + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) + { + broadcast_dim = rhs_dim; + } + output_shape.dim(i) = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index); + output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index); + + output()->resize(output_shape); +} + +void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out) +{ + tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in)); + tflite::RuntimeShape shape(getTensorShape(tensor_in)); + tflite::TransposeParams params; + int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) + { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + switch (tensor_in->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in), + transposed_shape, getTensorData<float>(tensor_out)); + break; + default: + throw std::runtime_error("Only suppport fp32 BatchMatMul for now."); + } +} + +void BatchMatMul::execute() const +{ + auto lhs = x(); + auto rhs = y(); + + bool adj_x = params().adj_x; + bool adj_y = params().adj_y; + + auto orig_lhs_shape = getTensorShape(lhs); + auto orig_rhs_shape = getTensorShape(rhs); + + auto rhs_tensor = adj_y ? rhs : temp_rhs(); + auto lhs_tensor = adj_x ? temp_lhs() : lhs; + if (not adj_y) + { + TransposeRowsColumns(rhs, temp_rhs()); + } + if (adj_x) + { + TransposeRowsColumns(lhs, temp_lhs()); + } + tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); + tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); + + switch (x()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape, + getTensorData<float>(lhs_tensor), getTensorShape(output()), + getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h new file mode 100644 index 000000000..744f49795 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H +#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchMatMul : public KernelWithParams<BatchMatMulParams> +{ +public: + BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp, + const BatchMatMulParams ¶ms); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + Tensor *temp_lhs() const { return _outputs[1]; } + Tensor *temp_rhs() const { return _outputs[2]; } +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp new file mode 100644 index 000000000..edfa3a685 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class BatchMatMulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(BatchMatMulTest, Float) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = true; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint) +{ + std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = true; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_BatchSizeTwo) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632., + 767., 800., 833., 866.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_DiffBatch) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4})); +} + +TEST_F(BatchMatMulTest, Invalid_Shape_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Batch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank_NEG) +{ + Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank2_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, TypeMisMatch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::U8, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp index f3a344974..52647a763 100644 --- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp @@ -58,7 +58,7 @@ template <typename T> class BatchToSpaceNDTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes); +TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes); TYPED_TEST(BatchToSpaceNDTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt index 1b7d0f66a..9f4ba0e0b 100644 --- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt +++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt @@ -15,7 +15,9 @@ endmacro(REGISTER_KERNEL) include(${KERNEL_REGISTER_FILE}) add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES}) -set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE}) diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp index 731260522..4713ad34c 100644 --- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp @@ -79,7 +79,7 @@ template <typename T> class CastTest : public ::testing::Test using IntDataTypes = ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>; -TYPED_TEST_CASE(CastTest, IntDataTypes); +TYPED_TEST_SUITE(CastTest, IntDataTypes); TYPED_TEST(CastTest, FloatToInt) { diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp index 7cfdf34b9..46ee5941e 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp @@ -69,11 +69,21 @@ void Concatenation::configure() Shape output_shape = t0->shape(); output_shape.dim(axis) = sum_axis; - // TODO S8 type needs more checking: quantization parameters of all input tensors and the output - // tensor should be the same. Note that there is no such requirement for U8 type. - if (t0->element_type() == DataType::S8) - throw std::runtime_error("Unsupported type."); + // If input tensors are INT8 type then quantization parameters of all input tensors and the output + // should be the same + for (auto current_tensor : _inputs) + { + if (current_tensor->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() == + output()->quantized_dimension()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() == + current_tensor->scales().size()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points()); + LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales()); + } + } output()->resize(output_shape); } diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp index e4b50611a..f893b38fd 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp @@ -183,12 +183,12 @@ TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG) +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG) { - std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6}; - std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get()); + std::vector<uint8_t> input1_data{1, 2, 3, 4}; + std::vector<int8_t> input2_data{5, 6, 7, 8}; + Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S8); ConcatenationParams params{}; @@ -199,6 +199,51 @@ TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + int quantized_dimension = 3; + std::vector<float> scales{0.1, 0.2, 0.3}; + std::vector<int32_t> zero_points{1, -1, 1}; + + Tensor input1_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0)); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4}; + std::vector<float> input2_data{5, 6, 7, 8}; + float scale = 0.1; + int32_t zero_point_1 = 1; + int32_t zero_point_2 = -1; + + Tensor input1_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + // TODO: Remove this test when concat w/ fused_activation is supported TEST_F(ConcatenationTest, With_Fused_Activation_NEG) { diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 5647f4c44..234f95425 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -30,8 +30,8 @@ namespace kernels { Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - Tensor *im2col, const Conv2DParams ¶ms) - : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params) + Tensor *scratchpad, const Conv2DParams ¶ms) + : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params) { } @@ -108,27 +108,18 @@ void Conv2D::configure() output()->resize({batches, output_height, output_width, output_depth}); - // Allocate tensor for Im2Col, if needed. - // The checks here should be aligned with the actual implementation. - const bool need_dilated_im2col = - _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1; - const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 || - filter_height != 1 || filter_width != 1; - _need_im2col = - input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col); - if (_need_im2col) - { - const int input_depth = input_shape.dim(3); - Shape im2col_shape{batches, output_height, output_width, - input_depth * filter_height * filter_width}; - auto im2col = getOutputTensors()[1]; - im2col->resize(im2col_shape); - } - else - { - auto im2col = getOutputTensors()[1]; - im2col->set_allocatable(false); - } + // Allocate tensor for scratchpad, if needed. + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params, + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); switch (_params.activation) { @@ -193,16 +184,16 @@ void Conv2D::evalFloat() const params.float_activation_min = activation_min; params.float_activation_max = activation_max; - float *im2col_data = nullptr; - auto im2col = getOutputTensors()[1]; - if (_need_im2col) - { - im2col_data = im2col->data<float>(); - } - luci_interpreter_pal::Conv( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data); + auto scratchpad = getOutputTensors()[1]; + float *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<float>(); + + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(filter()), getTensorData<float>(filter()), + getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output()), + getTensorShape(scratchpad), scratchpad_data); } void Conv2D::evalQuantized() const @@ -236,12 +227,12 @@ void Conv2D::evalQuantized() const params.quantized_activation_min = activation_min; params.quantized_activation_max = activation_max; - auto im2col = getOutputTensors()[1]; + auto scratchpad = getOutputTensors()[1]; luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), getTensorData<uint8_t>(output()), - getTensorShape(im2col), getTensorData<uint8_t>(im2col)); + getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad)); } void Conv2D::evalQuantizedPerChannel() const @@ -364,18 +355,16 @@ void Conv2D::evalQuantizedS8PerChannel() const std::back_inserter(multipliers), [](ChannelQuantMultipliers cm) { return cm.multiplier; }); - int8_t *im2col_data = nullptr; - auto im2col = getOutputTensors()[1]; - if (_need_im2col) - { - im2col_data = im2col->data<int8_t>(); - } + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); luci_interpreter_pal::ConvPerChannel( params, multipliers.data(), shifts.data(), getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), - getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data); + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); } void Conv2D::evalQuantizedS16() const diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 5f1317638..330bf3a2a 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams> { public: Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - Tensor *im2col, const Conv2DParams ¶ms); + Tensor *scratchpad, const Conv2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } @@ -49,7 +49,6 @@ private: void evalQuantizedS16() const; private: - bool _need_im2col = false; int32_t _padding_height{}; int32_t _padding_width{}; }; diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp index 9b1c09ba9..88e6e07f1 100644 --- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp @@ -32,7 +32,7 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(DepthToSpaceTest, DataTypes); +TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes); TYPED_TEST(DepthToSpaceTest, SimpleCase) { diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp index f2dbf6c68..c554c309d 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -18,9 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> -#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> -#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> +#include "PALDepthwiseConv2d.h" #include <stdexcept> @@ -30,8 +28,9 @@ namespace kernels { DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output, const DepthwiseConv2DParams ¶ms) - : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params) + Tensor *output, Tensor *scratchpad, + const DepthwiseConv2DParams ¶ms) + : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params) { } @@ -109,6 +108,16 @@ void DepthwiseConv2D::configure() filter_width, output_width); output()->resize({batches, output_height, output_width, channels_out}); + + tflite::DepthwiseParams params{}; + + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(), + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); } void DepthwiseConv2D::execute() const @@ -337,11 +346,16 @@ void DepthwiseConv2D::evalQuantizedS8PerChannel() const std::back_inserter(multipliers), [](ChannelQuantMultipliers cm) { return cm.multiplier; }); - tflite::reference_integer_ops::DepthwiseConvPerChannel( + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>( params, multipliers.data(), shifts.data(), getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), - getTensorData<int8_t>(output())); + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); } void DepthwiseConv2D::evalQuantizedS16() const diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h index 6cffd6583..3d1faf6c1 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams> { public: DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const DepthwiseConv2DParams ¶ms); + Tensor *scratchpad, const DepthwiseConv2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp index 74975899a..6b4673f3e 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -59,6 +59,7 @@ TEST_F(DepthwiseConv2DTest, Float) makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -70,8 +71,10 @@ TEST_F(DepthwiseConv2DTest, Float) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); _memory_manager->allocate_memory(output_tensor); kernel.execute(); @@ -111,6 +114,7 @@ TEST_F(DepthwiseConv2DTest, Uint8) {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -121,9 +125,11 @@ TEST_F(DepthwiseConv2DTest, Uint8) params.dilation_width_factor = 1; params.activation = Activation::NONE; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); std::vector<float> ref_output_data{ @@ -166,6 +172,7 @@ TEST_F(DepthwiseConv2DTest, SInt16) Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S64, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -176,9 +183,11 @@ TEST_F(DepthwiseConv2DTest, SInt16) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); @@ -224,6 +233,7 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -234,9 +244,11 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); @@ -299,6 +311,7 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -309,9 +322,11 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) params.dilation_width_factor = 1; params.activation = Activation::NONE; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); @@ -375,6 +390,7 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -385,9 +401,11 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) params.dilation_width_factor = 1; params.activation = Activation::NONE; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); @@ -419,6 +437,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -429,7 +448,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); EXPECT_ANY_THROW(kernel.configure()); } @@ -458,6 +478,7 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -468,7 +489,8 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); EXPECT_ANY_THROW(kernel.configure()); } @@ -497,6 +519,7 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -507,7 +530,8 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); EXPECT_ANY_THROW(kernel.configure()); } @@ -536,6 +560,7 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -546,7 +571,8 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); EXPECT_ANY_THROW(kernel.configure()); } @@ -575,6 +601,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -585,7 +612,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); EXPECT_ANY_THROW(kernel.configure()); } diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp new file mode 100644 index 000000000..96399e5c7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/Utils.h" +#include "PALDequantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Dequantize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 || + input()->element_type() == loco::DataType::U8 || + input()->element_type() == loco::DataType::S16); + + LUCI_INTERPRETER_CHECK(input()->scales().size() == 1); + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + + output()->resize(input()->shape()); +} + +void Dequantize::execute() const +{ + tflite::DequantizationParams op_params; + op_params.zero_point = input()->zero_point(); + op_params.scale = input()->scale(); + + switch (input()->element_type()) + { + case loco::DataType::U8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int16_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h new file mode 100644 index 000000000..5565df0e4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Dequantize : public Kernel +{ +public: + Dequantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp new file mode 100644 index 000000000..0cab633d6 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DequantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DequantizeTest, Uint8) +{ + std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint8) +{ + std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint16) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidOutputType_NEG) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp index 0e52ba1f0..dd1532278 100644 --- a/compiler/luci-interpreter/src/kernels/Div.cpp +++ b/compiler/luci-interpreter/src/kernels/Div.cpp @@ -46,6 +46,12 @@ void Div::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -56,13 +62,9 @@ void Div::execute() const void Div::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); - tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<float>(params, _params.activation); + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( getTensorShape(input1()), getTensorShape(input2()), ¶ms); @@ -80,6 +82,28 @@ void Div::evalFloat() const } } +template <typename T> void Div::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + void Div::evalQuantized() const { const auto input1_scale = static_cast<double>(input1()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h index 6040cdd02..c1bf3e10b 100644 --- a/compiler/luci-interpreter/src/kernels/Div.h +++ b/compiler/luci-interpreter/src/kernels/Div.h @@ -39,6 +39,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; }; diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp index 021d68d06..85cd8b90a 100644 --- a/compiler/luci-interpreter/src/kernels/Div.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp @@ -134,6 +134,56 @@ TEST_F(DivTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, // + 10, 0, 4, 5, 20, 0, // + 0, 0, 0, 2, 0, 0, // + 2, 0, 1, 10, 5, 0, // + 2, 3, 1, 0, 5, 1, // + 18, 20, 7, 0, 37, 10}, + {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10}, + {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0, + 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10}, + {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}}; + std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100}; + std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(DivTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + TEST_F(DivTest, Input_Output_Type_NEG) { Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); @@ -149,9 +199,9 @@ TEST_F(DivTest, Input_Output_Type_NEG) TEST_F(DivTest, Invalid_Input_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::S64); + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); DivParams params{}; params.activation = Activation::RELU; @@ -162,6 +212,19 @@ TEST_F(DivTest, Invalid_Input_Type_NEG) EXPECT_ANY_THROW(kernel.execute()); } +TEST_F(DivTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp index f58de1250..a57e127b7 100644 --- a/compiler/luci-interpreter/src/kernels/Equal.cpp +++ b/compiler/luci-interpreter/src/kernels/Equal.cpp @@ -49,6 +49,12 @@ void Equal::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -79,6 +85,29 @@ void Equal::evalFloat() const } } +template <typename T> void Equal::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + void Equal::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h index 11f025eac..c9be32cc0 100644 --- a/compiler/luci-interpreter/src/kernels/Equal.h +++ b/compiler/luci-interpreter/src/kernels/Equal.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp index 46a0f97d8..5870e5460 100644 --- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp @@ -99,6 +99,82 @@ TEST_F(EqualTest, FloatBroardcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + false, true, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(EqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(EqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; @@ -195,6 +271,36 @@ TEST_F(EqualTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(EqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp new file mode 100644 index 000000000..ba35c99fa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output) + : Kernel({input, axis}, {output}) +{ +} + +void ExpandDims::configure() +{ + int32_t axis_value; + + switch (axis()->element_type()) + { + case loco::DataType::S32: + axis_value = *getTensorData<int32_t>(axis()); + break; + case loco::DataType::S64: + axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis())); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + const auto input_shape = input()->shape(); + + if (axis_value < 0) + { + axis_value += input_shape.num_dims() + 1; + } + + LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0); + + Shape output_shape(input_shape.num_dims() + 1); + for (int32_t i = 0; i < output_shape.num_dims(); ++i) + { + if (i < axis_value) + { + output_shape.dim(i) = input_shape.dim(i); + } + else if (i == axis_value) + { + output_shape.dim(i) = 1; + } + else + { + LUCI_INTERPRETER_CHECK(i >= 1); + output_shape.dim(i) = input_shape.dim(i - 1); + } + } + + output()->resize(output_shape); +} + +void ExpandDims::execute() const +{ + // Just copy input to output + const auto *input_data = input()->data<void>(); + auto *output_data = output()->data<void>(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h new file mode 100644 index 000000000..e510b1160 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H +#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ExpandDims : public Kernel +{ +public: + ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp new file mode 100644 index 000000000..df9eaccc0 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ExpandDimsTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ExpandDimsTest, PositiveAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2})); +} + +TEST_F(ExpandDimsTest, NegAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {-1}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1})); +} + +TEST_F(ExpandDimsTest, InvalidAxisType_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<float> axis_value = {1.0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ExpandDimsTest, InvalidAxisValue_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {3}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp index cfe8f8bf2..bd2bb2f35 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp @@ -18,8 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> -#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> +#include "PALFullyConnected.h" #include <stdexcept> @@ -74,7 +73,18 @@ void FullyConnected::configure() if (bias()) LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0)); - output()->resize({batch_size, num_units}); + if (params().keep_num_dims == false) + { + output()->resize({batch_size, num_units}); + } + else + { + luci_interpreter::Shape output_shape(input_shape.num_dims()); + for (int i = 0; i < input_shape.num_dims(); ++i) + output_shape.dim(i) = input_shape.dim(i); + output_shape.dim(input_shape.num_dims() - 1) = num_units; + output()->resize(output_shape); + } } void FullyConnected::execute() const @@ -172,7 +182,7 @@ void FullyConnected::evalQuantizedS8() const op_params.quantized_activation_max = output_activation_max; op_params.lhs_cacheable = false; op_params.rhs_cacheable = false; - tflite::reference_integer_ops::FullyConnected( + luci_interpreter_pal::FullyConnected<int8_t>( op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()), getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), getTensorData<int8_t>(output())); diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp index b0eda0145..4474cc4fb 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp @@ -133,7 +133,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t, int8_t>; -TYPED_TEST_CASE(FullyConnectedTest, DataTypes); +TYPED_TEST_SUITE(FullyConnectedTest, DataTypes); TYPED_TEST(FullyConnectedTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp new file mode 100644 index 000000000..f1256660f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2021 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/Utils.h" +#include "PALGather.h" + +#include <stdexcept> +#include <cassert> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output, + const GatherParams &gparams) + : KernelWithParams<GatherParams>({params, indices}, {output}, gparams) +{ +} + +void Gather::configure() +{ + if (params()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 || + indices()->element_type() == DataType::S64); + + // refer tensorflow/lite/kernels/gather.cc + + const Shape ¶ms_shape = params()->shape(); + const Shape &indices_shape = indices()->shape(); + + int axis = _params.axis; + if (axis < 0) + { + axis += params_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims()); + + int batch_dims = _params.batch_dims; + // batch_dims should be in range: [-rank(indices), rank(indices)]. + // Negative batch_dims is added with rank of positions. + if (batch_dims < 0) + { + batch_dims += indices_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(batch_dims <= axis); + LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims()); + LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims()); + for (int i = 0; i < batch_dims; ++i) + { + LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i)); + } + + const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims; + + Shape output_shape(num_dimensions); + int output_index = 0; + for (int i = 0; i < axis; ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + for (int i = batch_dims; i < indices_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = indices_shape.dim(i); + } + for (int i = axis + 1; i < params_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + output()->resize(output_shape); +} + +void Gather::execute() const +{ + switch (params()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Gather::evalFloat() const +{ + assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64); + + const auto params_data = getTensorData<float>(params()); + auto output_data = getTensorData<float>(output()); + + tflite::GatherParams tparams; + tparams.axis = _params.axis; + tparams.batch_dims = _params.batch_dims; + + if (indices()->element_type() == DataType::S32) + { + const auto indices_data = getTensorData<int32_t>(indices()); + + luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } + else + { + const auto indices_data = getTensorData<int64_t>(indices()); + + luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h new file mode 100644 index 000000000..cc02d64fb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H +#define LUCI_INTERPRETER_KERNELS_GATHER_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Gather : public KernelWithParams<GatherParams> +{ +public: + Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams); + + const Tensor *params() const { return _inputs[0]; } + const Tensor *indices() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GATHER_H diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp new file mode 100644 index 000000000..4b3dda708 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GatherTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GatherTest, Simple) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4})); +} + +TEST_F(GatherTest, Simple_Batch) +{ + Shape params_shape = {3, 5}; + Shape indices_shape = {3, 2}; + std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.}; + std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3}; + std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get()); + Tensor indices_tensor = + makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2})); +} + +TEST_F(GatherTest, Simple_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Axis_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 100; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Batch_NEG) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 0; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp index f0dd2db36..5ccae3c38 100644 --- a/compiler/luci-interpreter/src/kernels/Greater.cpp +++ b/compiler/luci-interpreter/src/kernels/Greater.cpp @@ -49,6 +49,12 @@ void Greater::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -79,6 +85,29 @@ void Greater::evalFloat() const } } +template <typename T> void Greater::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + void Greater::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h index 877c139c9..065f76d7b 100644 --- a/compiler/luci-interpreter/src/kernels/Greater.h +++ b/compiler/luci-interpreter/src/kernels/Greater.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp index ba3925f17..a48080124 100644 --- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp @@ -97,6 +97,82 @@ TEST_F(GreaterTest, FloatBroardcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, false, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; @@ -223,6 +299,36 @@ TEST_F(GreaterTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(GreaterTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp index e7c1b4afe..27e42c971 100644 --- a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp @@ -52,6 +52,12 @@ void GreaterEqual::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -82,6 +88,29 @@ void GreaterEqual::evalFloat() const } } +template <typename T> void GreaterEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + void GreaterEqual::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h index 4a0f48748..e333c30a6 100644 --- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp index a9d172301..35bf88eab 100644 --- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp @@ -96,6 +96,81 @@ TEST_F(GreaterEqualTest, FloatBroardcast) EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value - 1, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; @@ -223,6 +298,36 @@ TEST_F(GreaterEqualTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(GreaterEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp index 1e565e358..6f960e8b4 100644 --- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp @@ -81,7 +81,7 @@ template <typename T> class L2NormalizeTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(L2NormalizeTest, DataTypes); +TYPED_TEST_SUITE(L2NormalizeTest, DataTypes); TYPED_TEST(L2NormalizeTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp index 289742a50..7245456cb 100644 --- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp @@ -206,7 +206,8 @@ TEST_F(L2Pool2DTest, FloatPaddingSameStride) kernel.execute(); std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0}; - EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05 + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f)); // TODO make a Shape checking of output_tensor. } diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp index 6ec8a348a..0f6263b57 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp @@ -83,7 +83,7 @@ template <typename T> class LeakReluTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(LeakReluTest, DataTypes); +TYPED_TEST_SUITE(LeakReluTest, DataTypes); TYPED_TEST(LeakReluTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp index 041444926..8d26ff297 100644 --- a/compiler/luci-interpreter/src/kernels/Less.cpp +++ b/compiler/luci-interpreter/src/kernels/Less.cpp @@ -49,6 +49,12 @@ void Less::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -79,6 +85,29 @@ void Less::evalFloat() const } } +template <typename T> void Less::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + void Less::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h index 293740e72..e27bb689c 100644 --- a/compiler/luci-interpreter/src/kernels/Less.h +++ b/compiler/luci-interpreter/src/kernels/Less.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp index e9d09b288..8c5963363 100644 --- a/compiler/luci-interpreter/src/kernels/Less.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp @@ -97,6 +97,82 @@ TEST_F(LessTest, FloatBroardcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; @@ -223,6 +299,36 @@ TEST_F(LessTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(LessTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp index 5f4c7f7aa..b474bc47a 100644 --- a/compiler/luci-interpreter/src/kernels/LessEqual.cpp +++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp @@ -49,6 +49,12 @@ void LessEqual::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -79,6 +85,29 @@ void LessEqual::evalFloat() const } } +template <typename T> void LessEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + void LessEqual::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h index b6da1a2a8..f82ea90d4 100644 --- a/compiler/luci-interpreter/src/kernels/LessEqual.h +++ b/compiler/luci-interpreter/src/kernels/LessEqual.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp index 0558003dd..b2e2fa7a1 100644 --- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp @@ -97,6 +97,82 @@ TEST_F(LessEqualTest, FloatBroardcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, true, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; @@ -223,6 +299,36 @@ TEST_F(LessEqualTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(LessEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp index 70227563f..5a1ea669c 100644 --- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp @@ -76,7 +76,7 @@ template <typename T> class LogisticTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(LogisticTest, DataTypes); +TYPED_TEST_SUITE(LogisticTest, DataTypes); TYPED_TEST(LogisticTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp index 89049c96c..2fbeefce4 100644 --- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +19,6 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/pad.h> - namespace luci_interpreter { namespace kernels @@ -59,44 +58,25 @@ void MirrorPad::configure() output()->resize(output_shape); } +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output); + void MirrorPad::execute() const { - const int num_dims = input()->shape().num_dims(); - - tflite::PadParams params{}; - params.left_padding_count = num_dims; - params.right_padding_count = num_dims; - - const auto *paddings_data = getTensorData<int32_t>(paddings()); - for (int i = num_dims - 1; i >= 0; --i) - { - params.left_padding[i] = paddings_data[i * 2]; - params.right_padding[i] = paddings_data[i * 2 + 1]; - } - switch (input()->element_type()) { case DataType::FLOAT32: { - const float pad_value = 0; - - // NOTE: this implementation only obtains min-max values for quantization - // TODO: calculate proper inference values - tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()), - &pad_value, getTensorShape(output()), - getTensorData<float>(output())); + MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output()); break; } case DataType::U8: { - // NOTE: this implementation only obtains min-max values for quantization - // TODO: calculate proper inference values assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); - const auto pad_value = static_cast<uint8_t>(output()->zero_point()); - tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()), - &pad_value, getTensorShape(output()), - getTensorData<uint8_t>(output())); + + MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output()); break; } default: @@ -104,5 +84,87 @@ void MirrorPad::execute() const } } +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output) +{ + auto const input_dims = input.shape().num_dims(); + auto const input_data = input.data<T>(); + auto const paddings_data = paddings.data<int32_t>(); + auto const output_data = output.data<T>(); + + auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1; + auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1; + auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1; + auto const input_d = input.shape().dim(input_dims - 1); + + auto const input_h_offset = input_d * input_w; + auto const input_b_offset = input_h_offset * input_h; + + auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1; + auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1; + auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1; + auto const output_d = output.shape().dim(input_dims - 1); + + auto const left_b_pad = paddings_data[2 * (input_dims - 4)]; + auto const left_h_pad = paddings_data[2 * (input_dims - 3)]; + auto const left_w_pad = paddings_data[2 * (input_dims - 2)]; + auto const left_d_pad = paddings_data[2 * (input_dims - 1)]; + + auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1]; + auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1]; + auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1]; + auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1]; + + const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; }; + const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h, + auto b) { + return d + w * input_d + h * input_h_offset + b * input_b_offset; + }; + + const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) { + bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1; + return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input); + }; + + const T *in_ptr = input_data; + T *out_ptr = output_data; + + for (int32_t b = 0; b < output_b; ++b) + { + for (int32_t h = 0; h < output_h; ++h) + { + for (int32_t w = 0; w < output_w; ++w) + { + for (int32_t d = 0; d < output_d; ++d) + { + if (b < left_b_pad || b >= output_b - right_b_pad || // + h < left_h_pad || h >= output_h - right_h_pad || // + w < left_w_pad || w >= output_w - right_w_pad || // + d < left_d_pad || d >= output_d - right_d_pad) + { + if (mode == MirrorPadMode::REFLECT) + { + *out_ptr++ = input_data[offset_index( + positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w), + positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))]; + } + else + { + *out_ptr++ = input_data[offset_index( + symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w), + symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))]; + } + } + else + { + *out_ptr++ = *in_ptr++; + } + } + } + } + } +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp index de9da5051..740d8cb22 100644 --- a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp @@ -14,4 +14,212 @@ * limitations under the License. */ -// TODO: Add tests for MirrorPad +#include "kernels/MirrorPad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MirrorPadTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode) + { + MirrorPadParams params{}; + params.mode = mode; + + MirrorPad kernel(&input, &padding, &output, params); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MirrorPadTest, FloatReflect) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 3.0, 3.0, 4.0, 4.0, 3.0, // + 3.0, 3.0, 4.0, 4.0, 3.0}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric2Dim) +{ + Shape input_shape = {3, 1}; + Shape padding_shape = {2, 2}; + + std::vector<float> input_data{1.0f, 2.0f, 3.0f}; + std::vector<int> padding_data{1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0}; + std::initializer_list<int32_t> ref_output_shape{6, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Reflect) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{ + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Symmetric) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{ + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, UnsupportedDim_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +TEST_F(MirrorPadTest, InvalidInputType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp index bc855de0f..531fb4fa1 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.cpp @@ -42,6 +42,8 @@ void Mul::configure() LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type()); if (input1()->element_type() == DataType::S16) { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1) LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && output()->zero_point() == 0); } @@ -56,6 +58,12 @@ void Mul::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::S16: evalQuantizedS16(); break; @@ -66,13 +74,8 @@ void Mul::execute() const void Mul::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); - tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<float>(params, _params.activation); const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( getTensorShape(input1()), getTensorShape(input2()), ¶ms); @@ -91,6 +94,28 @@ void Mul::evalFloat() const } } +template <typename T> void Mul::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + void Mul::evalQuantizedS16() const { const auto input1_scale = static_cast<double>(input1()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h index 2ccf60f3a..c0cf817df 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.h +++ b/compiler/luci-interpreter/src/kernels/Mul.h @@ -42,6 +42,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantizedS16() const; }; diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp index 471f6ac86..fc0e60614 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp @@ -93,6 +93,78 @@ TEST_F(MulTest, Float) } } +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + dtype max_value = std::numeric_limits<dtype>::max(); + dtype res_max = max_value - max_value % 10; + + std::vector<std::vector<dtype>> test_outputs = { + {8, 0, 20, 0, 4, 30, // + 16, 0, 40, 3, 8, 0, // + 0, 0, 0, 6, 0, 0, // + 4, 0, 10, 9, 2, 0, // + 40, 0, 100, 0, 20, 150, // + 28, 0, 70, 0, 14, res_max}, + {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max}, + {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2, + 70, res_max}, + {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}}; + std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10}; + std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(MulTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + TEST_F(MulTest, SInt16) { Shape base_shape = {2, 3, 1, 2}; @@ -161,6 +233,60 @@ TEST_F(MulTest, SInt16) } } +TEST_F(MulTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(MulTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + MulParams params{}; + params.activation = Activation::NONE; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp index 99d5e0fa0..54e5eee34 100644 --- a/compiler/luci-interpreter/src/kernels/NotEqual.cpp +++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp @@ -49,6 +49,12 @@ void NotEqual::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -79,6 +85,29 @@ void NotEqual::evalFloat() const } } +template <typename T> void NotEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + void NotEqual::evalQuantized() const { const auto x_data = getTensorData<uint8_t>(x()); diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h index 247874df7..d2aafe893 100644 --- a/compiler/luci-interpreter/src/kernels/NotEqual.h +++ b/compiler/luci-interpreter/src/kernels/NotEqual.h @@ -38,6 +38,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; private: diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp index 763f86893..45bf4022a 100644 --- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp @@ -99,6 +99,82 @@ TEST_F(NotEqualTest, FloatBroardcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); } +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + true, false, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(NotEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(NotEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; @@ -195,6 +271,36 @@ TEST_F(NotEqualTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(NotEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp new file mode 100644 index 000000000..4d3e5f2ef --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename T> +void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor, + const Tensor *off_value_tensor, int32_t depth, int32_t axis, + Tensor *output_tensor) +{ + // define input shape and correct axis + auto const &input_shape = indices_tensor->shape(); + axis = axis == -1 ? input_shape.num_dims() : axis; + + // TODO support other integer input types + auto const *indices = getTensorData<int32_t>(indices_tensor); + auto const on_value = getTensorData<T>(on_value_tensor)[0]; + auto const off_value = getTensorData<T>(off_value_tensor)[0]; + auto *output = getTensorData<T>(output_tensor); + + // prefix_dim_size == # of elements before the axis + // depth == # of elements per axis + // suffix_dim_size == # of elements after the axis + auto prefix_dim_size = 1; + for (int32_t i = 0; i < axis; ++i) + { + prefix_dim_size *= input_shape.dim(i); + } + assert(prefix_dim_size > 0); + auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size; + + // View the indices as a matrix of size: + // prefix_dim_size x suffix_dim_size + // View the output as a matrix of size: + // prefix_dim_size x depth x suffix_dim_size + // Then the output is: + // output(i, j, k) == (indices(i, k) == j) ? on : off + for (int32_t i = 0; i < prefix_dim_size; ++i) + for (int32_t j = 0; j < depth; ++j) + for (int32_t k = 0; k < suffix_dim_size; ++k, ++output) + *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value; +} + +} // namespace + +OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms) + : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params) +{ + // Do nothing +} + +void OneHot::configure() +{ + // check types + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type()); + + // check shape dependent parameters + LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims()); + + // define parameters that affect the output shape + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const &input_shape = indices()->shape(); + auto const input_dims = input_shape.num_dims(); + auto const axis = params().axis == -1 ? input_dims : params().axis; + + // define output shape + Shape output_shape(input_shape.num_dims() + 1); + { + for (int32_t d = 0; d < axis; ++d) + output_shape.dim(d) = input_shape.dim(d); + + output_shape.dim(axis) = depth_value; + + for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d) + output_shape.dim(d) = input_shape.dim(d - 1); + } + + // reshape output + output()->resize(output_shape); +} + +void OneHot::execute() const +{ + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const axis = params().axis; + + switch (output()->element_type()) + { + case loco::DataType::FLOAT32: + OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::U8: + OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::S16: + OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + default: + // TODO Support other data types + throw std::runtime_error("Not supported, yet!"); + break; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h new file mode 100644 index 000000000..572f857ae --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H +#define LUCI_INTERPRETER_KERNELS_ONEHOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class OneHot : public KernelWithParams<OneHotParams> +{ +public: + OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms); + + const Tensor *indices() const { return _inputs[0]; } + const Tensor *depth() const { return _inputs[1]; } + const Tensor *on_value() const { return _inputs[2]; } + const Tensor *off_value() const { return _inputs[3]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp new file mode 100644 index 000000000..45b6968fa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data, + std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data, + int32_t axis, std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr auto input_type = getElementType<T1>(); + constexpr auto output_type = getElementType<T2>(); + + Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + OneHotParams params{}; + params.axis = axis; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <typename T> class OneHotTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_SUITE(OneHotTest, DataTypes); + +TYPED_TEST(OneHotTest, BasicPattern) +{ + // axis 0 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/0, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 1, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 1, 0, // + 0, 1, 0, // + }); + // axis 1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/1, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + + 0, 0, 1, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + }); + // axis -1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/-1, + /*output_data=*/ + { + 1, 0, 0, 0, // + 0, 0, 0, 1, // + 0, 0, 0, 0, // + + 0, 0, 0, 0, // + 0, 0, 0, 1, // + 1, 0, 0, 0, // + }); +} + +TEST(OneHotTest, UnsupportedInputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + // input type should be integer + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get()); + + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, OutputTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + + // type of on_value, off_value and output_tensor should be same + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, InvalidAxis_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + // axis should be in [-1, input_shape.rank] + OneHotParams params = {-2}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp index 90a0f894e..2404e4303 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp @@ -80,7 +80,7 @@ template <typename T> class PackTest : public ::testing::Test }; using DataTypes = ::testing::Types<uint8_t, float>; -TYPED_TEST_CASE(PackTest, DataTypes); +TYPED_TEST_SUITE(PackTest, DataTypes); TYPED_TEST(PackTest, ThreeInputs) { diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp index 700448e7a..fe172884b 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.cpp @@ -93,6 +93,16 @@ void Pad::execute() const getTensorData<uint8_t>(output())); break; } + case DataType::S8: + { + assert(output()->zero_point() >= std::numeric_limits<int8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<int8_t>::max()); + const auto pad_value = static_cast<int8_t>(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<int8_t>(output())); + break; + } default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp index 7994263e2..dd3ce947c 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp @@ -54,6 +54,32 @@ TEST(Pad, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); } +TEST(Pad, Int8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2}; + std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0}; + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0, + 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1})); +} + TEST(Pad, Float) { std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp new file mode 100644 index 000000000..0c8544a65 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/Utils.h" +#include "PALQuantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output) +{ + int32_t multiplier; + int shift; + + const double effective_output_scale = input->scale() / output->scale(); + quantizeMultiplier(effective_output_scale, &multiplier, &shift); + + const auto input_shape = getTensorShape(input); + const auto output_shape = getTensorShape(output); + const auto size = tflite::MatchingFlatSize(input_shape, output_shape); + + const auto input_data = getTensorData<input_dtype>(input); + + switch (output->element_type()) + { + case loco::DataType::S8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int8_t>(output)); + break; + case loco::DataType::U8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<uint8_t>(output)); + break; + case loco::DataType::S16: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int16_t>(output)); + break; + default: + throw std::runtime_error("Unsupported quantized type, yet!"); + } +} + +} // namespace + +Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Quantize::configure() +{ + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::S16); + break; + } + case loco::DataType::S16: + case loco::DataType::S8: + case loco::DataType::U8: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S16); + if (output()->element_type() == loco::DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == 0); + } + break; + } + default: + throw std::runtime_error("Unsupported type"); + } + + output()->resize(input()->shape()); +} + +void Quantize::execute() const +{ + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + tflite::QuantizationParams op_params; + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + const auto input_data = getTensorData<float>(input()); + + switch (output()->element_type()) + { + case loco::DataType::S8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + } + case loco::DataType::U8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<int16_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } + break; + } + case loco::DataType::S16: + { + call_requantize<int16_t>(input(), output()); + break; + } + case loco::DataType::S8: + { + call_requantize<int8_t>(input(), output()); + break; + } + case loco::DataType::U8: + { + call_requantize<uint8_t>(input(), output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h new file mode 100644 index 000000000..006c5366f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Quantize : public Kernel +{ +public: + Quantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp new file mode 100644 index 000000000..22e67fe3f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class QuantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(QuantizeTest, FloatUint8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt16) +{ + std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64}; + + std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200, + 200, 400, 600, 12700, 12800}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, Int16Int16) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int8Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Uint8Uint8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int16Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp index 7af20f8c4..933a1128c 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp @@ -90,7 +90,7 @@ template <typename T> class ResizeBilinearTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ResizeBilinearTest, DataTypes); +TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes); TYPED_TEST(ResizeBilinearTest, SimpleTest) { diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp index 0e9017c78..7ade02a6f 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp @@ -92,7 +92,7 @@ template <typename T> class ResizeNearestNeighborTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ResizeNearestNeighborTest, DataTypes); +TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes); TYPED_TEST(ResizeNearestNeighborTest, SimpleTest) { diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp index 2bd94875b..c0025faca 100644 --- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp @@ -33,7 +33,7 @@ template <typename T> class ReverseV2Test : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ReverseV2Test, DataTypes); +TYPED_TEST_SUITE(ReverseV2Test, DataTypes); TYPED_TEST(ReverseV2Test, MultiDimensions) { diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp new file mode 100644 index 000000000..40d79aaa3 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/Utils.h" +#include "PALSVDF.h" + +#include <tensorflow/lite/kernels/internal/quantization_util.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ +TfLiteFusedActivation get_tflite_activation(Activation activation) +{ + switch (activation) + { + case luci::FusedActFunc::RELU: + return kTfLiteActRelu; + case luci::FusedActFunc::RELU6: + return kTfLiteActRelu6; + case luci::FusedActFunc::RELU_N1_TO_1: + return kTfLiteActReluN1To1; + case luci::FusedActFunc::TANH: + return kTfLiteActTanh; + case luci::FusedActFunc::SIGN_BIT: + return kTfLiteActSignBit; + case luci::FusedActFunc::NONE: + return kTfLiteActNone; + default: + throw std::runtime_error("Unsupported activation type"); + } +} +} // namespace + +SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms) + : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state}, + {output, scratchpad_activation_state, scratchpad_1, scratchpad_2, + scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6}, + params) +{ + // Do nothing +} + +void SVDF::configure() +{ + const Shape &input_shape = input()->shape(); + const Shape &weight_features_shape = weight_feature()->shape(); + const Shape &weight_time_shape = weight_time()->shape(); + + // Validate Input Tensor: + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 || + input()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2); + + // Validate inputs and output types + if (input()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 || + weight_time()->element_type() == loco::DataType::S8); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32); + + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 || + input_activation_state()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8); + + // Note: now tflite support only ReLU activation for integer SVDF + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU); + } + else if (weight_feature()->element_type() == loco::DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + } + else if ((weight_feature()->element_type() == loco::DataType::U8 || + weight_feature()->element_type() == loco::DataType::S8) && + input()->element_type() == loco::DataType::FLOAT32) + { + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int rank = params().svdf_rank; + const int batch_size = input_shape.dim(0); + const int num_filters = weight_features_shape.dim(0); + LUCI_INTERPRETER_CHECK(rank != 0); + LUCI_INTERPRETER_CHECK(num_filters % rank == 0); + + const int num_units = num_filters / rank; + const int memory_size = weight_time_shape.dim(1); + + // Validate Weight_Feature Input Tensor: + LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1)); + + // Validate Weight_Time Input Tensor: + LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters); + + // Validate Bias + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units); + + // Validate Input Activation State + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters); + + // Resize scratchpad_state to input_activation_state + auto scratchpad_activation_state = getOutputTensors()[1]; + scratchpad_activation_state->resize({batch_size, memory_size * num_filters}); + + // Resize output tensor + output()->resize({batch_size, num_units}); + + luci_interpreter_pal::SetupScratchpadTensor( + input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2], + getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6], + getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units); +} + +void SVDF::execute() const +{ + switch (weight_feature()->element_type()) + { + case loco::DataType::FLOAT32: + evalFloat(); + break; + case loco::DataType::S8: + { + if (input()->element_type() == loco::DataType::S8) + evalInteger(); + else + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + break; + } + default: + throw std::runtime_error("Unsupported type"); + } +} + +void SVDF::evalInteger() const +{ + const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() / + input_activation_state()->scale()); + const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() * + weight_time()->scale() / output()->scale()); + + int32_t effective_scale_1_a; + int effective_scale_1_b; + int32_t effective_scale_2_a; + int effective_scale_2_b; + + tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b); + tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b); + + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad = getOutputTensors()[2]; + auto output_temp = getOutputTensors()[3]; + + int32_t input_zp = input()->zero_point(); + int32_t output_zp = output()->zero_point(); + luci_interpreter_pal::IntegerSVDF( + params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()), + getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()), + getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()), + getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad), + getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b, + effective_scale_2_a, effective_scale_2_b, input_zp, output_zp); +} + +void SVDF::evalFloat() const +{ + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<float>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad_1 = getOutputTensors()[2]; + + luci_interpreter_pal::FloatSVDF( + params_svdf, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(weight_feature()), getTensorData<float>(weight_feature()), + getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()), + getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data, + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h new file mode 100644 index 000000000..335a6cd8f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H +#define LUCI_INTERPRETER_KERNELS_SVDF_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SVDF : public KernelWithParams<SVDFParams> +{ +public: + SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weight_feature() const { return _inputs[1]; } + const Tensor *weight_time() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + const Tensor *input_activation_state() const { return _inputs[4]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalInteger() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SVDF_H diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp new file mode 100644 index 000000000..82bd9b009 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SVDFTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(SVDFTest, FullIntegerTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape bias_shape{units}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083, + 0.17660543, 0.52949083, -0.77931279}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1); + std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1); + std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512); + std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16); + + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::S8>( + weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second, + weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = makeInputTensor<DataType::S16>( + weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second, + weight_time_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor( + DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::RELU; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0}; + + std::vector<int32_t> ref_output_shape{batches, units}; + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, FloatTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465, + 0.35867718, 0.36897406, 0.73463392}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883, + -0.03004015, 0.09565311, 0.1587342, 0.00784263}; + + std::vector<float> ref_output_shape{batches, units}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, Unsupported_Type_Configure_NEG) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SVDFTest, Invalid_Input_Shape_NEG) +{ + const int32_t batches = 2; + const int32_t right_input_size = 3; + const int32_t wrong_input_size = 4; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, wrong_input_size}; + Shape weight_feature_shape{num_filters, right_input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp index 37a834a18..2fe2c5471 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.cpp @@ -139,6 +139,11 @@ void Slice::execute() const getTensorData<uint8_t>(input()), getTensorShape(output()), getTensorData<uint8_t>(output())); break; + case DataType::S8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output())); + break; default: throw std::runtime_error("Unsupported input type."); } diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp index 3e0d0b0d7..517982990 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp @@ -31,8 +31,8 @@ template <typename T> class SliceTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SliceTest, DataTypes); +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(SliceTest, DataTypes); TYPED_TEST(SliceTest, SimpleTest) { diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp index 9de40b6ec..08e70672d 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp @@ -93,7 +93,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t, int8_t>; -TYPED_TEST_CASE(SoftmaxTest, DataTypes); +TYPED_TEST_SUITE(SoftmaxTest, DataTypes); TYPED_TEST(SoftmaxTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp index e06501c8c..3a8b0a812 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp @@ -90,7 +90,7 @@ template <typename T> class SpaceToBatchNDTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes); +TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes); TYPED_TEST(SpaceToBatchNDTest, Simple) { diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp index 735c010b9..4af488618 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp @@ -32,7 +32,7 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SpaceToDepthTest, DataTypes); +TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes); TYPED_TEST(SpaceToDepthTest, SimpleCase) { diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp index 74d57aed3..283cd9aa9 100644 --- a/compiler/luci-interpreter/src/kernels/Split.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp @@ -73,7 +73,7 @@ template <typename T> class SplitTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SplitTest, DataTypes); +TYPED_TEST_SUITE(SplitTest, DataTypes); TYPED_TEST(SplitTest, FourDimensional) { diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp index aac0567d7..035bc2122 100644 --- a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp @@ -77,7 +77,7 @@ template <typename T> class SplitVTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t, int16_t>; -TYPED_TEST_CASE(SplitVTest, DataTypes); +TYPED_TEST_SUITE(SplitVTest, DataTypes); TYPED_TEST(SplitVTest, ThreeDimensional) { diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp index d3326fe98..1bc0b6459 100644 --- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp @@ -56,7 +56,7 @@ template <typename T> class SqueezeTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SqueezeTest, DataTypes); +TYPED_TEST_SUITE(SqueezeTest, DataTypes); TYPED_TEST(SqueezeTest, TotalTest) { diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp index 603c62d0f..24b6a72e5 100644 --- a/compiler/luci-interpreter/src/kernels/Sub.cpp +++ b/compiler/luci-interpreter/src/kernels/Sub.cpp @@ -37,6 +37,7 @@ Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubPa void Sub::configure() { LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type())) + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type())) output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); } @@ -47,6 +48,12 @@ void Sub::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; @@ -57,13 +64,8 @@ void Sub::execute() const void Sub::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); - tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<float>(params, _params.activation); const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( getTensorShape(input1()), getTensorShape(input2()), ¶ms); @@ -82,6 +84,28 @@ void Sub::evalFloat() const } } +template <typename T> void Sub::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + void Sub::evalQuantized() const { const auto input1_scale = static_cast<double>(input1()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h index d7940b5c6..23952b3bd 100644 --- a/compiler/luci-interpreter/src/kernels/Sub.h +++ b/compiler/luci-interpreter/src/kernels/Sub.h @@ -39,6 +39,7 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; }; diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp index c189f4481..9abafd49a 100644 --- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp @@ -162,6 +162,51 @@ TEST_F(SubTest, Float) } } +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0, + 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0}, + {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0}, + {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0, + 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0}, + {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(SubTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + TEST_F(SubTest, Input_Output_Type_NEG) { Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); @@ -175,11 +220,24 @@ TEST_F(SubTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST_F(SubTest, Invalid_Input_Type_NEG) +TEST_F(SubTest, Invalid_Output_Type_NEG) { Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::S64); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); SubParams params{}; params.activation = Activation::RELU; @@ -190,6 +248,19 @@ TEST_F(SubTest, Invalid_Input_Type_NEG) EXPECT_ANY_THROW(kernel.execute()); } +TEST_F(SubTest, Mismatching_Input_Int_Types_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp index 107179910..43be8f8b9 100644 --- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp @@ -52,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(TransposeTest, DataTypes); +TYPED_TEST_SUITE(TransposeTest, DataTypes); TYPED_TEST(TransposeTest, Small3D) { diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp index 4f22c9f30..9384ddc83 100644 --- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp @@ -75,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(UnpackTest, DataTypes); +TYPED_TEST_SUITE(UnpackTest, DataTypes); TYPED_TEST(UnpackTest, ThreeOutputs) { diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp index 586cfa1e1..5d8e5db83 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.cpp +++ b/compiler/luci-interpreter/src/kernels/Utils.cpp @@ -27,17 +27,18 @@ namespace luci_interpreter namespace kernels { -void calculateActivationRange(Activation activation, float *activation_min, float *activation_max) +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max) { switch (activation) { case Activation::NONE: - *activation_min = std::numeric_limits<float>::lowest(); - *activation_max = std::numeric_limits<float>::max(); + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); break; case Activation::RELU: *activation_min = 0; - *activation_max = std::numeric_limits<float>::max(); + *activation_max = std::numeric_limits<T>::max(); break; case Activation::RELU_N1_TO_1: *activation_min = -1; @@ -52,6 +53,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa } } +template void calculateActivationRange(Activation activation, float *activation_min, + float *activation_max); +template void calculateActivationRange(Activation activation, int32_t *activation_min, + int32_t *activation_max); +template void calculateActivationRange(Activation activation, int64_t *activation_min, + int64_t *activation_max); + static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax, const Tensor *output, int32_t *activation_min, int32_t *activation_max) @@ -175,7 +183,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_ { const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1; const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1; - assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1); + + bool need_broadcast = input1_dim != input2_dim; + bool can_broadcast = input1_dim == 1 || input2_dim == 1; + LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast); + output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim); } diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h index 817a42f83..ebeb20e66 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.h +++ b/compiler/luci-interpreter/src/kernels/Utils.h @@ -76,11 +76,42 @@ inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2 return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3; } -void calculateActivationRange(Activation activation, float *activation_min, float *activation_max); +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max); void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max); +template <typename T> constexpr bool one_of_types() { return false; } + +// Checks if T is equal to one of {U,Other} types +template <typename T, typename U, typename... Other> constexpr bool one_of_types() +{ + return std::is_same<T, U>::value || one_of_types<T, Other...>(); +} + +/** + * Fills activation min and max parameters depending on given data type and activation + * + * T is a template parameter, so after optimization this code left with only required if case + * + * @tparam T data type of arithmetic operation output tensor + * @param params tflite params to fill + * @param activation luci_interpreter::Activation of arithmetic operation + */ +template <typename T> +void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act) +{ + static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype"); + + if (std::is_same<T, float>::value) + calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max); + if (std::is_same<T, int32_t>::value) + calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max); + else + calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max); +} + // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of its exponent. // diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt index 2cde99f5d..292771592 100644 --- a/compiler/luci-interpreter/src/loader/CMakeLists.txt +++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt @@ -17,7 +17,9 @@ endmacro(REGISTER_KERNEL) include(${KERNEL_REGISTER_FILE}) add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES}) -set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}") target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index a14442ed5..dba39050c 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -73,6 +73,26 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size) } } +const void *getNodeData(const luci::CircleCustom *node, size_t *data_size) +{ + if (node->custom_code() != "CircleReferencingConst") + return nullptr; + + // helper struct which describes data loaded to custom_options of CircleReferencingConst node + // TODO move this struct to header + struct ConstDataReference + { + const uint8_t *data = nullptr; + uint32_t size = 0; + }; + + const auto &custom_options = node->custom_options(); + const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data()); + + *data_size = const_data_ref.size; + return const_data_ref.data; +} + bool isExecutableNode(const luci::CircleNode *node) { switch (node->opcode()) @@ -83,12 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node) case luci::CircleOpcode::CIRCLEOUTPUT: case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE: // The following nodes denote outputs of multiple-output nodes. + case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT: + case luci::CircleOpcode::CIRCLECUSTOMOUT: case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT: case luci::CircleOpcode::CIRCLESPLITOUT: case luci::CircleOpcode::CIRCLESPLITVOUT: + case luci::CircleOpcode::CIRCLETOPKV2OUT: + case luci::CircleOpcode::CIRCLEUNIQUEOUT: case luci::CircleOpcode::CIRCLEUNPACKOUT: + case luci::CircleOpcode::CIRCLEVARIABLE: case luci::CircleOpcode::CIRCLEWHILEOUT: return false; + // Custom nodes may be executable and non-executable + case luci::CircleOpcode::CUSTOM: + { + auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more non-executable Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return false; + + return true; + } default: return true; } @@ -102,15 +140,34 @@ bool isTensorProducingNode(const luci::CircleNode *node) case luci::CircleOpcode::CIRCLEOUTPUT: // The following nodes are multiple-output nodes. They do not produce tensors, the tensors // are produced by the corresponding *Out nodes instead. + case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM: + case luci::CircleOpcode::CUSTOM: case luci::CircleOpcode::IF: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5: case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::SPLIT_V: + case luci::CircleOpcode::TOPK_V2: + case luci::CircleOpcode::UNIQUE: case luci::CircleOpcode::UNPACK: + case luci::CircleOpcode::WHILE: return false; default: return true; } } +bool isSupportedCustomNode(const luci::CircleNode *node) +{ + const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return true; + + return false; +} + } // namespace GraphLoader::GraphLoader( @@ -129,18 +186,25 @@ void GraphLoader::loadTensors() { const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i)); + if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) + throw std::runtime_error("Unknown Custom Node, yet."); + if (!isTensorProducingNode(node)) continue; - // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred. + // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will + // be inferred. Shape shape{}; - if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node)) + switch (node->opcode()) { - shape = getNodeShape(input_node); - } - else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node)) - { - shape = getNodeShape(const_node); + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + shape = getNodeShape(node); + break; + default: + break; } AffineQuantization quantization; @@ -175,6 +239,22 @@ void GraphLoader::loadTensors() tensor->writeData(const_data, data_size); } } + else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node)) + { + const auto *custom_node = + loco::must_cast<const luci::CircleCustom *>(custom_out_node->input()); + + if (custom_node->custom_code() == "CircleReferencingConst") + { + size_t data_size{}; + const void *const_data = getNodeData(custom_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } + } _node_to_tensor.emplace(node, tensor.get()); _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node); diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp index 7a457a62f..b221b6921 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -21,6 +21,7 @@ #include <kernels/Add.h> #include <kernels/ArgMax.h> #include <kernels/AveragePool2D.h> +#include <kernels/BatchMatMul.h> #include <kernels/Cast.h> #include <kernels/Concatenation.h> #include <kernels/Conv2D.h> @@ -54,6 +55,7 @@ #include <kernels/Mul.h> #include <kernels/Neg.h> #include <kernels/NotEqual.h> +#include <kernels/OneHot.h> #include <kernels/Pad.h> #include <kernels/PadV2.h> #include <kernels/Pow.h> @@ -209,6 +211,27 @@ TEST_F(KernelBuilderTest, AveragePool2D) EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } +TEST_F(KernelBuilderTest, BatchMatMul) +{ + auto *lhs = createInputNode(); + auto *rhs = createInputNode(); + + auto *op = createNode<luci::CircleBatchMatMul>(); + op->x(lhs); + op->y(rhs); + op->adj_x(false); + op->adj_y(false); + + auto kernel = buildKernel<kernels::BatchMatMul>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), lhs); + checkTensor(kernel->y(), rhs); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x())); + EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y())); +} + TEST_F(KernelBuilderTest, Cast) { auto *input = createInputNode(); @@ -832,6 +855,31 @@ TEST_F(KernelBuilderTest, NotEqual) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, OneHot) +{ + auto *indices = createInputNode(); + auto *depth = createInputNode(); + auto *on_value = createInputNode(); + auto *off_value = createInputNode(); + auto axis = 1; + + auto *op = createNode<luci::CircleOneHot>(); + op->indices(indices); + op->depth(depth); + op->on_value(on_value); + op->off_value(off_value); + op->axis(axis); + + auto kernel = buildKernel<kernels::OneHot>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->indices(), indices); + checkTensor(kernel->depth(), depth); + checkTensor(kernel->on_value(), on_value); + checkTensor(kernel->off_value(), off_value); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + TEST_F(KernelBuilderTest, Pad) { auto *input = createInputNode(); diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp index 5bc37bd4a..efb011257 100644 --- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -17,6 +17,7 @@ #include "Builders.h" #include "kernels/AveragePool2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> namespace luci_interpreter { @@ -40,7 +41,26 @@ std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode params.stride_width = node->stride()->w(); params.activation = node->fusedActivationFunction(); - return std::make_unique<kernels::AveragePool2D>(input, output, params); + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params); } } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp new file mode 100644 index 000000000..aae3dbab1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchMatMul.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *lhs = helper.getInputTensor(node->x()); + const Tensor *rhs = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + auto lhs_scratchpad = + std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, ""); + lhs_scratchpad->set_observable(false); + lhs_scratchpad->set_data_buffer(nullptr); + auto rhs_scratchpad = + std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, ""); + rhs_scratchpad->set_observable(false); + rhs_scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current BatchMatMul temporary was found. + if (execution_plan.offsets().size() > 1) + { + assert(execution_plan.offsets().size() == 3); + + // If this is true, then we keep this offset in scratchpad. + lhs_scratchpad->set_offset(execution_plan.offsets().at(1)); + rhs_scratchpad->set_offset(execution_plan.offsets().at(2)); + } + } + Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad)); + Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad)); + + BatchMatMulParams params; + params.adj_x = node->adj_x(); + params.adj_y = node->adj_y(); + + return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp index 22fd1aca4..b48d97d19 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -35,11 +35,12 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle const Tensor *bias = helper.getOptionalInputTensor(node->bias()); Tensor *output = helper.getOutputTensor(node); - auto im2col = - std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); - im2col->set_observable(false); - im2col->set_data_buffer(nullptr); - // If node has execution plan then read memory offsets for im2col temporary tensor + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor // from the beginning of shared memory buffer. // Used in Static Memory Manager. // TODO move tensors offset initialization to one place @@ -48,10 +49,10 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle const auto execution_plan = luci::get_execution_plan(node); // Check whether the offset for the current CircleConv2D temporary was found. if (execution_plan.offsets().size() > 1) - // If this is true, then we keep this offset in im2col. - im2col->set_offset(execution_plan.offsets().at(1)); + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); } - Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col)); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); Conv2DParams params{}; params.padding = node->padding(); diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp index c2f0346a2..db26ecf2e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -17,6 +17,7 @@ #include "Builders.h" #include "kernels/DepthwiseConv2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> namespace luci_interpreter { @@ -43,7 +44,26 @@ std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNod params.dilation_width_factor = node->dilation()->w(); params.activation = node->fusedActivationFunction(); - return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params); + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params); } } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp new file mode 100644 index 000000000..4aae56469 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Dequantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Dequantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp new file mode 100644 index 000000000..9840c34e5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ExpandDims.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ExpandDims>(input, axis, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp index 2917598fc..0b8ac44bd 100644 --- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -36,6 +36,7 @@ std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode FullyConnectedParams params{}; params.activation = node->fusedActivationFunction(); + params.keep_num_dims = node->keep_num_dims(); return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); } diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp new file mode 100644 index 000000000..9df9775c5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Gather.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *params = helper.getInputTensor(node->params()); + const Tensor *indices = helper.getInputTensor(node->indices()); + Tensor *output = helper.getOutputTensor(node); + + GatherParams gparams{}; + gparams.axis = node->axis(); + // TODO support batch_dims + gparams.batch_dims = 0; + + return std::make_unique<kernels::Gather>(params, indices, output, gparams); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp new file mode 100644 index 000000000..a40160945 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/OneHot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node); + assert(node->arity() == 4); + + const Tensor *indices = helper.getInputTensor(node->indices()); + const Tensor *depth = helper.getInputTensor(node->depth()); + const Tensor *on_value = helper.getInputTensor(node->on_value()); + const Tensor *off_value = helper.getInputTensor(node->off_value()); + Tensor *output = helper.getOutputTensor(node); + + OneHotParams params{}; + params.axis = node->axis(); + + return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp new file mode 100644 index 000000000..fd9836345 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Quantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Quantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp new file mode 100644 index 000000000..89528d5ee --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SVDF.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *feature = helper.getInputTensor(node->weight_feature()); + const Tensor *time = helper.getInputTensor(node->weight_time()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state()); + Tensor *output = helper.getOutputTensor(node); + + auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(), + Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + if (data_type == DataType::FLOAT32 && + (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8)) + { + data_type = feature->element_type(); + } + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + data_type = DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + SVDFParams params{}; + params.activation = node->fusedActivationFunction(); + params.svdf_rank = node->svdf_rank(); + params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs(); + + return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output, + tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt index 94347082c..c8a2e12e1 100644 --- a/compiler/luci-micro/CMakeLists.txt +++ b/compiler/luci-micro/CMakeLists.txt @@ -6,7 +6,7 @@ set(ARM_OBJCOPY "arm-none-eabi-objcopy") find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER}) if(NOT ARM_C_COMPILER_PATH) - message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build") + message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)") return() endif() diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt index b31415870..034fe5269 100644 --- a/compiler/luci-pass-value-test/CMakeLists.txt +++ b/compiler/luci-pass-value-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(TEST_DEPS) unset(LUCI_PASS_VALUE_TESTS) @@ -38,7 +42,7 @@ add_test(NAME luci_pass_value_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh" "${CMAKE_CURRENT_BINARY_DIR}" "${ARTIFACTS_BIN_PATH}" - "${NNCC_OVERLAY_DIR}/venv_2_6_0" + "${NNCC_OVERLAY_DIR}/venv_2_8_0" "$<TARGET_FILE:luci_eval_driver>" ${LUCI_PASS_VALUE_TESTS} ) diff --git a/compiler/luci-pass-value-test/eval_result_verifier.py b/compiler/luci-pass-value-test/eval_result_verifier.py index c6005edfc..0073c4db5 100644 --- a/compiler/luci-pass-value-test/eval_result_verifier.py +++ b/compiler/luci-pass-value-test/eval_result_verifier.py @@ -22,6 +22,18 @@ circle_model = args.circle interpreter = tf.lite.Interpreter(tflite_model) interpreter.allocate_tensors() +# Read SignatureDef and get output tensor id orders for remapping +full_signatures = interpreter._get_full_signature_list() +full_signatures_outputs_remap = None +if full_signatures != None: + signature_serving_default = full_signatures.get('serving_default', None) + if signature_serving_default != None: + signature_outputs = signature_serving_default['outputs'] + + full_signatures_outputs_remap = [] + for index, (key, value) in enumerate(signature_outputs.items()): + full_signatures_outputs_remap.append(value) + # Generate random input data. num_inputs = len(interpreter.get_input_details()) for i in range(num_inputs): @@ -33,6 +45,10 @@ for i in range(num_inputs): input_data = np.array( np.random.randint(0, 256, size=input_details["shape"]), input_details["dtype"]) + elif input_details["dtype"] == np.int16: + input_data = np.array( + np.random.randint(0, 100, size=input_details["shape"]), + input_details["dtype"]) elif input_details["dtype"] == np.bool_: input_data = np.array( np.random.choice(a=[True, False], size=input_details["shape"]), @@ -55,48 +71,38 @@ subprocess.run( check=True) # Compare the results. -for idx in range(len(interpreter.get_output_details())): - output_details = interpreter.get_output_details()[idx] +inpt_output_details = interpreter.get_output_details() +for idx in range(len(inpt_output_details)): + output_details = inpt_output_details[idx] output_data = np.fromfile(circle_model + ".output" + str(idx), output_details["dtype"]) shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r') output_shape = [int(i) for i in shape_file.read().split(',')] luci_output_data = np.reshape(output_data, output_shape) + output_tensor = output_details["index"] + if full_signatures_outputs_remap != None: + output_tensor = full_signatures_outputs_remap[idx] + intp_output_data = interpreter.get_tensor(output_tensor) try: if output_details["dtype"] == np.uint8: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.float32: if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=1.e-5, - atol=1.e-5) == False: + luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.int64: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.int32: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + tflite_model + + " does not match with " + circle_model) + elif output_details["dtype"] == np.int16: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) else: diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst index 9c408887d..67476c644 100644 --- a/compiler/luci-pass-value-test/test.lst +++ b/compiler/luci-pass-value-test/test.lst @@ -29,3 +29,7 @@ addeval(Net_InstanceNorm_001 fuse_instnorm) addeval(Net_InstanceNorm_002 fuse_instnorm) addeval(Net_InstanceNorm_003 fuse_instnorm) addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice) + +# test SignatureDef, with any optimization +#addeval(SignatureDef_MultiOut_000 fuse_instnorm) +#addeval(SignatureDef_MultiOut_001 fuse_instnorm) diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt index 3c7185b80..ebf9c5926 100644 --- a/compiler/luci-value-test/CMakeLists.txt +++ b/compiler/luci-value-test/CMakeLists.txt @@ -1,9 +1,18 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(LUCI_VALUE_TESTS) +unset(LUCI_VALUE_TESTS_TOL) macro(addeval NAME) list(APPEND LUCI_VALUE_TESTS ${NAME}) endmacro(addeval) +macro(addevaltol NAME RTOL ATOL) + list(APPEND LUCI_VALUE_TESTS_TOL ${NAME} ${RTOL} ${ATOL}) +endmacro(addevaltol) + # Read "test.lst" include("test.lst") # Read "test.local.lst" if exists @@ -12,13 +21,60 @@ include("test.local.lst" OPTIONAL) # Generate dependencies add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES}) -get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) +if(NOT CMAKE_CROSSCOMPILING) + + get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) + + add_test(NAME luci_value_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh" + "${CMAKE_CURRENT_BINARY_DIR}" + "${ARTIFACTS_BIN_PATH}" + "${NNCC_OVERLAY_DIR}/venv_2_8_0" + "$<TARGET_FILE:luci_eval_driver>" + ${LUCI_VALUE_TESTS} + ) + + if(DEFINED LUCI_VALUE_TESTS_TOL) + add_test(NAME luci_value_tol_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh" + "${CMAKE_CURRENT_BINARY_DIR}" + "${ARTIFACTS_BIN_PATH}" + "${NNCC_OVERLAY_DIR}/venv_2_8_0" + "$<TARGET_FILE:luci_eval_driver>" + ${LUCI_VALUE_TESTS_TOL} + ) + endif() + +else(NOT CMAKE_CROSSCOMPILING) + # NOTE target test is carried out using reference input/output data from host + # test results. this is because it would be difficult to prepare + # TensorFlow lite for target device. + # thus, one must run the host test and then run the test in target device + # with the test result files from the host test. + + if(NOT DEFINED ENV{BUILD_HOST_EXEC}) + message(STATUS "BUILD_HOST_EXEC not set: Skip luci-value-test") + return() + endif(NOT DEFINED ENV{BUILD_HOST_EXEC}) + + set(ARTIFACTS_BIN_PATH $ENV{BUILD_HOST_EXEC}/compiler/common-artifacts) + + add_test(NAME luci_value_cross_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify_ref.sh" + "${CMAKE_CURRENT_BINARY_DIR}" + "${ARTIFACTS_BIN_PATH}" + "$<TARGET_FILE:luci_eval_driver>" + ${LUCI_VALUE_TESTS} + ) + + if(DEFINED LUCI_VALUE_TESTS_TOL) + add_test(NAME luci_value_cross_tol_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol_ref.sh" + "${CMAKE_CURRENT_BINARY_DIR}" + "${ARTIFACTS_BIN_PATH}" + "$<TARGET_FILE:luci_eval_driver>" + ${LUCI_VALUE_TESTS_TOL} + ) + endif() -add_test(NAME luci_value_test - COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh" - "${CMAKE_CURRENT_BINARY_DIR}" - "${ARTIFACTS_BIN_PATH}" - "${NNCC_OVERLAY_DIR}/venv_2_6_0" - "$<TARGET_FILE:luci_eval_driver>" - ${LUCI_VALUE_TESTS} -) +endif(NOT CMAKE_CROSSCOMPILING) diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh index 01c4bce46..3d2091176 100755 --- a/compiler/luci-value-test/evalverify.sh +++ b/compiler/luci-value-test/evalverify.sh @@ -4,10 +4,12 @@ # # HOW TO USE # -# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ... +# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \ +# <TEST 1> <TEST 2> ... # bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test) # work_dir : artifacts directoy where test materials exist # venv_dir : python virtual environment home directory +# eval_driver : luci_eval_driver path for evaluation VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py" diff --git a/compiler/luci-value-test/evalverify_ref.sh b/compiler/luci-value-test/evalverify_ref.sh new file mode 100755 index 000000000..f1e538aa3 --- /dev/null +++ b/compiler/luci-value-test/evalverify_ref.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# This script verifies the basic behavior of luci interpreter +# +# HOW TO USE +# +# ./evalverify_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \ +# <TEST 1> <TEST 2> ... +# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test) +# ref_dir : artifacts directoy where reference test materials exist +# eval_driver : luci_eval_driver path for evaluation + +VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py" +BINDIR="$1"; shift +REFDIR="$1"; shift +INTERPRETER_DRIVER_PATH="$1"; shift + +TESTED=() +PASSED=() +FAILED=() + +for TESTCASE in "$@"; do + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${REFDIR}/${TESTCASE}" + TEST_RESULT_FILE="${BINDIR}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}.log" <( + exec 2>&1 + set -ex + + "python3" "${VERIFY_SCRIPT_PATH}" \ + --driver "${INTERPRETER_DRIVER_PATH}" \ + --model_ref "${TESTCASE_FILE}" \ + --work_path "${TEST_RESULT_FILE}" + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("${TESTCASE}") + else + FAILED+=("${TESTCASE}") + fi +done + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/luci-value-test/evalverifytol.sh b/compiler/luci-value-test/evalverifytol.sh new file mode 100755 index 000000000..92094055a --- /dev/null +++ b/compiler/luci-value-test/evalverifytol.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# This script verifies the basic behavior of luci interpreter +# +# HOW TO USE +# +# ./evalverifytol.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \ +# <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ... +# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test) +# work_dir : artifacts directoy where test materials exist +# venv_dir : python virtual environment home directory + +VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py" +BINDIR="$1"; shift +WORKDIR="$1"; shift +VIRTUALENV="$1"; shift +INTERPRETER_DRIVER_PATH="$1"; shift + +TESTED=() +PASSED=() +FAILED=() + +while (( "$#" >= 3 )); do + TESTCASE=$1 + RTOLERANCE=$2 + ATOLERANCE=$3 + shift 3 + + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${WORKDIR}/${TESTCASE}" + TEST_RESULT_FILE="${BINDIR}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}.log" <( + exec 2>&1 + set -ex + + source "${VIRTUALENV}/bin/activate" + "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \ + --driver "${INTERPRETER_DRIVER_PATH}" \ + --model "${TESTCASE_FILE}" \ + --rtolf32 "${RTOLERANCE}" \ + --atolf32 "${ATOLERANCE}" + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("${TESTCASE}") + else + FAILED+=("${TESTCASE}") + fi +done + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/luci-value-test/evalverifytol_ref.sh b/compiler/luci-value-test/evalverifytol_ref.sh new file mode 100755 index 000000000..cc7267b18 --- /dev/null +++ b/compiler/luci-value-test/evalverifytol_ref.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# This script verifies the basic behavior of luci interpreter +# +# HOW TO USE +# +# ./evalverifytol_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \ +# <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ... +# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test) +# ref_dir : artifacts directoy where reference test materials exist +# eval_driver : luci_eval_driver path for evaluation + +VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py" +BINDIR="$1"; shift +REFDIR="$1"; shift +INTERPRETER_DRIVER_PATH="$1"; shift + +TESTED=() +PASSED=() +FAILED=() + +while (( "$#" >= 3 )); do + TESTCASE=$1 + RTOLERANCE=$2 + ATOLERANCE=$3 + shift 3 + + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${REFDIR}/${TESTCASE}" + TEST_RESULT_FILE="${BINDIR}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}.log" <( + exec 2>&1 + set -ex + + "python3" "${VERIFY_SCRIPT_PATH}" \ + --driver "${INTERPRETER_DRIVER_PATH}" \ + --model_ref "${TESTCASE_FILE}" \ + --work_path "${TEST_RESULT_FILE}" \ + --rtolf32 "${RTOLERANCE}" \ + --atolf32 "${ATOLERANCE}" + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("${TESTCASE}") + else + FAILED+=("${TESTCASE}") + fi +done + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py index a76bd1403..560e34fca 100755 --- a/compiler/luci-value-test/luci_eval_verifier.py +++ b/compiler/luci-value-test/luci_eval_verifier.py @@ -14,16 +14,41 @@ import traceback parser = argparse.ArgumentParser() parser.add_argument('--driver', type=str, required=True) parser.add_argument('--model', type=str, required=True) +parser.add_argument('--rtolf32', type=str, required=False) +parser.add_argument('--atolf32', type=str, required=False) args = parser.parse_args() driver = args.driver tflite_model = args.model + ".tflite" circle_model = args.model + ".circle" +rtolf32 = 1e-5 +atolf32 = 1e-5 +try: + if args.rtolf32 != None: + rtolf32 = float(args.rtolf32) + if args.atolf32 != None: + atolf32 = float(args.atolf32) +except ValueError: + print("rtolf32 or atolf32 is not a number") + quit(128) + # Build TFLite interpreter. interpreter = tf.lite.Interpreter(tflite_model) interpreter.allocate_tensors() +# Read SignatureDef and get output tensor id orders for remapping +full_signatures = interpreter._get_full_signature_list() +full_signatures_outputs_remap = None +if full_signatures != None: + signature_serving_default = full_signatures.get('serving_default', None) + if signature_serving_default != None: + signature_outputs = signature_serving_default['outputs'] + + full_signatures_outputs_remap = [] + for index, (key, value) in enumerate(signature_outputs.items()): + full_signatures_outputs_remap.append(value) + # Generate random input data. num_inputs = len(interpreter.get_input_details()) for i in range(num_inputs): @@ -31,19 +56,40 @@ for i in range(num_inputs): if input_details["dtype"] == np.float32: input_data = np.array( np.random.random_sample(input_details["shape"]), input_details["dtype"]) + input_dtype = "float32" elif input_details["dtype"] == np.uint8: input_data = np.array( np.random.randint(0, 256, size=input_details["shape"]), input_details["dtype"]) + input_dtype = "uint8" + elif input_details["dtype"] == np.int16: + input_data = np.array( + np.random.randint(0, 100, size=input_details["shape"]), + input_details["dtype"]) + input_dtype = "int16" + elif input_details["dtype"] == np.int32: + input_data = np.array( + np.random.randint(0, 100, size=input_details["shape"]), + input_details["dtype"]) + input_dtype = "int32" + elif input_details["dtype"] == np.int64: + input_data = np.array( + np.random.randint(0, 100, size=input_details["shape"]), + input_details["dtype"]) + input_dtype = "int64" elif input_details["dtype"] == np.bool_: input_data = np.array( np.random.choice(a=[True, False], size=input_details["shape"]), input_details["dtype"]) + input_dtype = "bool" else: raise SystemExit("Unsupported input dtype") interpreter.set_tensor(input_details["index"], input_data) input_data.tofile(circle_model + ".input" + str(i)) + input_details["shape"].tofile(circle_model + ".input" + str(i) + ".shape", sep=',') + with open(circle_model + ".input" + str(i) + ".dtype", 'w') as dtype_file: + dtype_file.write(input_dtype) # Do inference interpreter.invoke() @@ -57,34 +103,57 @@ subprocess.run( check=True) # Compare the results. -for idx in range(len(interpreter.get_output_details())): - output_details = interpreter.get_output_details()[idx] +inpt_output_details = interpreter.get_output_details() +for idx in range(len(inpt_output_details)): + output_details = inpt_output_details[idx] output_data = np.fromfile(circle_model + ".output" + str(idx), output_details["dtype"]) shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r') output_shape = [int(i) for i in shape_file.read().split(',')] luci_output_data = np.reshape(output_data, output_shape) - intp_output_data = interpreter.get_tensor(output_details["index"]) + output_tensor = output_details["index"] + if full_signatures_outputs_remap != None: + output_tensor = full_signatures_outputs_remap[idx] + intp_output_data = interpreter.get_tensor(output_tensor) try: if output_details["dtype"] == np.uint8: if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) + output_dtype = "uint8" elif output_details["dtype"] == np.float32: if np.allclose( - luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False: + luci_output_data, intp_output_data, rtol=rtolf32, + atol=atolf32) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) + output_dtype = "float32" elif output_details["dtype"] == np.int64: if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) + output_dtype = "int64" elif output_details["dtype"] == np.int32: if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) + output_dtype = "int32" + elif output_details["dtype"] == np.int16: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + tflite_model + + " does not match with " + circle_model) + output_dtype = "int16" + elif output_details["dtype"] == np.bool_: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + tflite_model + + " does not match with " + circle_model) + output_dtype = "bool" else: raise SystemExit("Unsupported data type: ", output_details["dtype"]) + + # save outputN.dtype file + with open(circle_model + ".output" + str(idx) + ".dtype", 'w') as dtype_file: + dtype_file.write(output_dtype) except: print(traceback.format_exc()) quit(255) diff --git a/compiler/luci-value-test/luci_eval_verifier_ref.py b/compiler/luci-value-test/luci_eval_verifier_ref.py new file mode 100755 index 000000000..5313e336e --- /dev/null +++ b/compiler/luci-value-test/luci_eval_verifier_ref.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +import numpy as np +import subprocess +import argparse +import traceback +import os + +# +# This script compares the execution result of luci-interpreter with that from ref_model path +# +# Basic usage: +# luci_eval_verifier_ref.py --driver build/compiler/luci-eval-driver/luci_eval_driver +# --ref_model ref_model_path --model this_model_path +# Assumption: +# these file exist with its purpose +# - ref_model_path.circle; circle model +# - ref_model_path.circle.inputN; N'th input numpy data +# - ref_model_path.circle.inputN.dtype; N'th input data type in text +# - ref_model_path.circle.inputN.shape; N'th input data shape in CSV +# - ref_model_path.circle.outputN; N'th output numpy data +# - ref_model_path.circle.outputN.dtype; N'th output data type in text +# - ref_model_path.circle.outputN.shape; N'th output data shape in CSV + + +def dtype_from_file(file_path): + with open(file_path, 'r') as dtype_file: + dtype_str = dtype_file.read() + if dtype_str == "float32": + return np.float32 + if dtype_str == "uint8": + return np.uint8 + if dtype_str == "int16": + return np.int16 + if dtype_str == "int32": + return np.int32 + if dtype_str == "int64": + return np.int64 + if dtype_str == "bool": + return np.bool_ + raise SystemExit("Unsupported dtype from file", dtype_str) + + +parser = argparse.ArgumentParser() +parser.add_argument('--driver', type=str, required=True) +parser.add_argument('--model_ref', type=str, required=True) +parser.add_argument('--work_path', type=str, required=True) +parser.add_argument('--rtolf32', type=str, required=False) +parser.add_argument('--atolf32', type=str, required=False) +args = parser.parse_args() + +driver = args.driver +circle_model_ref = args.model_ref + ".circle" +circle_model = args.work_path + ".circle" +# circle_model is used as to follow existing luci_eval_verifier.py + +rtolf32 = 1e-5 +atolf32 = 1e-5 +try: + if args.rtolf32 != None: + rtolf32 = float(args.rtolf32) + if args.atolf32 != None: + atolf32 = float(args.atolf32) +except ValueError: + print("rtolf32 or atolf32 is not a number") + quit(128) + +# get num of inputs by checking existance of model.inputN +check_input = 0 +while True: + input_file_path = circle_model_ref + ".input" + str(check_input) + if not os.path.isfile(input_file_path): + num_inputs = check_input + break + check_input = check_input + 1 + +if num_inputs == 0: + print("input file not exist for", circle_model_ref) + quit(128) + +# get num of outputs by checking existance of model.outputN +check_output = 0 +while True: + output_file_path = circle_model_ref + ".output" + str(check_output) + if not os.path.isfile(output_file_path): + num_outputs = check_output + break + check_output = check_output + 1 + +if num_outputs == 0: + print("output file not exist for", circle_model_ref) + quit(128) + +# Execute luci interpreter with reference input +subprocess.run( + [ + driver, circle_model_ref, + str(num_inputs), circle_model_ref + ".input", circle_model + ".output" + ], + check=True) + +# Compare the results. +for idx in range(num_outputs): + output_dtype = dtype_from_file(circle_model_ref + ".output" + str(idx) + ".dtype") + shape_file = open(circle_model_ref + ".output" + str(idx) + ".shape", 'r') + output_shape = [int(i) for i in shape_file.read().split(',')] + + output_data_ref = np.fromfile(circle_model_ref + ".output" + str(idx), output_dtype) + luci_output_data_ref = np.reshape(output_data_ref, output_shape) + + output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype) + luci_output_data = np.reshape(output_data, output_shape) + + try: + if output_dtype == np.uint8: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + elif output_dtype == np.float32: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=rtolf32, + atol=atolf32) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + elif output_dtype == np.int64: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + elif output_dtype == np.int32: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + elif output_dtype == np.int16: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + elif output_dtype == np.bool_: + if np.allclose( + luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False: + raise SystemExit("Execution result of " + circle_model_ref + + " does not match with " + circle_model) + else: + raise SystemExit("Unsupported data type: ", output_dtype) + except: + print(traceback.format_exc()) + quit(255) + +quit(0) diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst index 2b5c93fa3..f62b72919 100644 --- a/compiler/luci-value-test/test.lst +++ b/compiler/luci-value-test/test.lst @@ -20,90 +20,90 @@ addeval(ArgMax_U8_003) #addeval(ArgMin_U8_002) #addeval(ArgMin_U8_003) addeval(AveragePool2D_000) -#addeval(BatchMatMul_000) +addeval(BatchMatMul_000) #addeval(BatchMatMulV2_000) #addeval(BatchMatMulV2_001) #addeval(BatchToSpaceND_000) -#addeval(Cast_000) -#addeval(Cast_001) +addeval(Cast_000) +addeval(Cast_001) #addeval(Ceil_000) addeval(Concatenation_000) addeval(Concatenation_U8_000) addeval(Conv2D_000) addeval(Conv2D_001) addeval(Conv2D_002) -#addeval(Conv2D_003) +addeval(Conv2D_003) addeval(Conv2D_U8_000) addeval(Conv2D_U8_001) #addeval(Cos_000) -#addeval(DepthToSpace_000) +addeval(DepthToSpace_000) addeval(DepthwiseConv2D_000) addeval(DepthwiseConv2D_U8_000) #addeval(DepthwiseConv2D_U8_001) addeval(DepthwiseConv2D_001) -#addeval(Div_000) +addeval(Div_000) addeval(ELU_000) -#addeval(Equal_000) -#addeval(Exp_000) +addeval(Equal_000) +addeval(Exp_000) #addeval(ExpandDims_000) #addeval(ExpandDims_001) #addeval(ExpandDims_002) #addeval(ExpandDims_003) #addeval(Fill_000) #addeval(Fill_001) -#addeval(Floor_000) -#addeval(FloorDiv_000) -#addeval(FloorDiv_001) +addeval(Floor_000) +addeval(FloorDiv_000) +addeval(FloorDiv_001) #addeval(FloorMod_000) #addeval(FloorMod_001) addeval(FullyConnected_000) addeval(FullyConnected_001) addeval(FullyConnected_002) #addeval(FullyConnected_U8_000) -#addeval(Gather_000) +addeval(Gather_000) #addeval(GatherNd_000) #addeval(Greater_000) -#addeval(GreaterEqual_000) +addeval(GreaterEqual_000) addeval(If_000) addeval(If_001) addeval(L2Normalize_000) addeval(L2Pool2D_000) #addeval(L2Pool2D_U8_000) addeval(LeakyRelu_000) -#addeval(Less_000) -#addeval(LessEqual_000) +addeval(Less_000) +addeval(LessEqual_000) addeval(LocalResponseNormalization_000) #addeval(Log_000) -#addeval(LogicalAnd_000) -#addeval(LogicalNot_000) -#addeval(LogicalOr_000) +addeval(LogicalAnd_000) +addeval(LogicalNot_000) +addeval(LogicalOr_000) addeval(Logistic_000) -#addeval(LogSoftmax_000) +addeval(LogSoftmax_000) #addeval(MatMul_000) #addeval(MatrixDiag_000) #addeval(MatrixSetDiag_000) -#addeval(Maximum_000) +addeval(Maximum_000) addeval(MaxPool2D_000) addeval(MaxPool2D_U8_000) addeval(Mean_000) addeval(Mean_001) -#addeval(Mean_U8_000) -#addeval(Minimum_000) +addeval(Mean_U8_000) +addeval(Minimum_000) #addeval(MirrorPad_000) addeval(Mul_000) #addeval(Mul_U8_000) -#addeval(Neg_000) -#addeval(NotEqual_000) -#addeval(OneHot_000) -#addeval(OneHot_001) -#addeval(OneHot_002) +addeval(Neg_000) +addeval(NotEqual_000) +addeval(OneHot_000) +addeval(OneHot_001) +addeval(OneHot_002) #addeval(OneHot_003) -#addeval(Pack_000) -#addeval(Pack_U8_000) +addeval(Pack_000) +addeval(Pack_U8_000) addeval(Pad_000) addeval(Pad_U8_000) -#addeval(Pow_000) -#addeval(PRelu_000) +addeval(Pow_000) +addeval(PRelu_000) #addeval(Range_000) #addeval(Rank_000) #addeval(ReduceAny_000) @@ -116,20 +116,20 @@ addeval(Pad_U8_000) #addeval(ReduceProd_001) #addeval(ReduceProd_002) #addeval(ReduceProd_003) -#addeval(ReLU_000) -#addeval(ReLU6_000) +addeval(ReLU_000) +addeval(ReLU6_000) #addeval(ReLUN1To1_000) addeval(Reshape_000) addeval(Reshape_001) addeval(Reshape_002) #addeval(Reshape_003) addeval(Reshape_U8_000) -#addeval(ResizeBilinear_000) -#addeval(ResizeNearestNeighbor_000) +addeval(ResizeBilinear_000) +addeval(ResizeNearestNeighbor_000) #addeval(ReverseSequence_000) #addeval(ReverseV2_000) #addeval(Round_000) -#addeval(Rsqrt_000) +addeval(Rsqrt_000) #addeval(ScatterNd_000) #addeval(SegmentSum_000) #addeval(Select_000) @@ -139,37 +139,39 @@ addeval(Reshape_U8_000) #addeval(SelectV2_001) #addeval(SelectV2_002) #addeval(Shape_000) +addeval(SignatureDef_MultiOut_000) +addeval(SignatureDef_MultiOut_001) #addeval(Sin_000) addeval(Slice_000) addeval(Softmax_000) -#addeval(Softmax_U8_000) -#addeval(SpaceToBatchND_000) -#addeval(SpaceToBatchND_001) -#addeval(SpaceToBatchND_002) -#addeval(SpaceToBatchND_003) +addeval(Softmax_U8_000) +addeval(SpaceToBatchND_000) +addeval(SpaceToBatchND_001) +addeval(SpaceToBatchND_002) +addeval(SpaceToBatchND_003) addeval(SpaceToDepth_000) #addeval(SparseToDense_000) addeval(Split_000) -#addeval(SplitV_000) -#addeval(Sqrt_000) -#addeval(Square_000) -#addeval(SquaredDifference_000) +addeval(SplitV_000) +addeval(Sqrt_000) +addeval(Square_000) +addeval(SquaredDifference_000) addeval(Squeeze_000) addeval(Squeeze_001) addeval(StridedSlice_000) addeval(StridedSlice_001) addeval(StridedSlice_002) -#addeval(Sub_000) -#addeval(Sub_U8_000) +addeval(Sub_000) +addeval(Sub_U8_000) #addeval(Sum_000) #addeval(Sum_001) -#addeval(Tanh_000) +addeval(Tanh_000) #addeval(Tile_000) #addeval(Tile_U8_000) #addeval(TopKV2_000) #addeval(TopKV2_001) addeval(Transpose_000) -#addeval(TransposeConv_000) +addeval(TransposeConv_000) addeval(Unpack_000) addeval(Unpack_001) addeval(Unpack_002) @@ -180,9 +182,13 @@ addeval(Unpack_003) #addeval(While_001) #addeval(While_002) #addeval(While_003) -#addeval(YUV_TO_RGB_U8_000) +addeval(YUV_TO_RGB_U8_000) #addeval(ZerosLike_000) # Simple Network test addeval(Part_While_000) addeval(Part_While_001) + +# Tests with tolerance +addevaltol(SVDF_000 8e-3 8e-3) +addevaltol(SVDF_001 8e-3 8e-3) diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt index b92eefb40..460dc7b23 100644 --- a/compiler/luci/CMakeLists.txt +++ b/compiler/luci/CMakeLists.txt @@ -23,4 +23,8 @@ add_subdirectory(import) add_subdirectory(export) add_subdirectory(tester) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_subdirectory(tests) diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt index a267d0e1f..f46181eb6 100644 --- a/compiler/luci/export/CMakeLists.txt +++ b/compiler/luci/export/CMakeLists.txt @@ -12,7 +12,7 @@ target_include_directories(luci_export PUBLIC include) target_link_libraries(luci_export PRIVATE luci_lang) target_link_libraries(luci_export PRIVATE luci_service) target_link_libraries(luci_export PRIVATE luci_pass) -target_link_libraries(luci_export PRIVATE mio_circle) +target_link_libraries(luci_export PRIVATE mio_circle04) target_link_libraries(luci_export PRIVATE luci_env) target_link_libraries(luci_export PRIVATE luci_log) target_link_libraries(luci_export PRIVATE luci_logex) @@ -36,6 +36,6 @@ target_include_directories(luci_export_test PRIVATE src) target_link_libraries(luci_export_test luci_export) target_link_libraries(luci_export_test luci_plan) target_link_libraries(luci_export_test luci_lang) -target_link_libraries(luci_export_test mio_circle) +target_link_libraries(luci_export_test mio_circle04) target_link_libraries(luci_export_test luci_env) target_link_libraries(luci_export_test oops) diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h new file mode 100644 index 000000000..0ff21a34b --- /dev/null +++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h @@ -0,0 +1,539 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__ +#define __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__ + +#include "CircleExporterUtils.h" + +#include <luci/IR/CircleNode.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +#include <flatbuffers/flexbuffers.h> + +namespace luci +{ + +// NOTE Virtual nodes are not circle builtin operators. +// Therefore, they are not defined here. +class BuiltinOptionsExtractor final + : public luci::CircleNodeMutableVisitor<flatbuffers::Offset<void>> +{ +public: + BuiltinOptionsExtractor(flatbuffers::FlatBufferBuilder &builder) : _builder{builder} + { + // DO NOTHING + } + +public: + flatbuffers::Offset<void> visit(luci::CircleAbs *) + { + return circle::CreateAbsOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleAdd *node) + { + return circle::CreateAddOptions(_builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleAddN *) + { + return circle::CreateAddNOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleArgMax *node) + { + return circle::CreateArgMaxOptions(_builder, luci::to_circle_tensortype(node->output_type())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleArgMin *node) + { + return circle::CreateArgMinOptions(_builder, luci::to_circle_tensortype(node->output_type())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleAveragePool2D *node) + { + return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(), + node->stride()->h(), node->filter()->w(), + node->filter()->h(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleBatchMatMul *node) + { + return circle::CreateBatchMatMulOptions(_builder, node->adj_x(), node->adj_y()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleBatchToSpaceND *) + { + return circle::CreateBatchToSpaceNDOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleBidirectionalSequenceLSTM *node) + { + return circle::CreateBidirectionalSequenceLSTMOptions( + _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(), + node->proj_clip(), node->merge_outputs(), node->time_major(), + node->asymmetric_quantize_inputs()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleCast *node) + { + if (node->out_data_type() == loco::DataType::Unknown) + return _no_option; + else + return circle::CreateCastOptions(_builder, luci::to_circle_tensortype(node->in_data_type()), + luci::to_circle_tensortype(node->out_data_type())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleCeil *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleConcatenation *node) + { + return circle::CreateConcatenationOptions(_builder, node->axis(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + // CircleConst is not virtual but not builtinOperator + // flatbuffers::Offset<void> visit(luci::CircleConst *) + flatbuffers::Offset<void> visit(luci::CircleConv2D *node) + { + return circle::CreateConv2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(), + node->stride()->h(), + to_circle_actfunc(node->fusedActivationFunction()), + node->dilation()->w(), node->dilation()->h()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleCos *) + { + return circle::CreateCosOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node) + { + return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleDepthwiseConv2D *node) + { + return circle::CreateDepthwiseConv2DOptions( + _builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h(), + node->depthMultiplier(), to_circle_actfunc(node->fusedActivationFunction()), + node->dilation()->w(), node->dilation()->h()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleDequantize *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleDiv *node) + { + return circle::CreateDivOptions(_builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleElu *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleEqual *) + { + return circle::CreateEqualOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleExp *) + { + return circle::CreateExpOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleExpandDims *) + { + return circle::CreateExpandDimsOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleFakeQuant *node) + { + return circle::CreateFakeQuantOptions(_builder, node->min(), node->max(), node->num_bits(), + node->narrow_range()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleFill *) + { + return circle::CreateFillOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleFloor *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleFloorDiv *) + { + return circle::CreateFloorDivOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleFloorMod *) + { + return circle::CreateFloorModOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleFullyConnected *node) + { + return circle::CreateFullyConnectedOptions( + _builder, to_circle_actfunc(node->fusedActivationFunction()), + to_circle_weightsformat(node->weights_format()), node->keep_num_dims()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleGather *node) + { + return circle::CreateGatherOptions(_builder, node->axis()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleGatherNd *) + { + return circle::CreateGatherNdOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleGreater *) + { + return circle::CreateGreaterOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleGreaterEqual *) + { + return circle::CreateGreaterEqualOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleIf *node) + { + return circle::CreateIfOptions(_builder, node->then_branch(), node->else_branch()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleL2Normalize *node) + { + return circle::CreateL2NormOptions(_builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleL2Pool2D *node) + { + return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(), + node->stride()->h(), node->filter()->w(), + node->filter()->h(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLeakyRelu *node) + { + return circle::CreateLeakyReluOptions(_builder, node->alpha()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLess *) + { + return circle::CreateLessOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLessEqual *) + { + return circle::CreateLessEqualOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLocalResponseNormalization *node) + { + return circle::CreateLocalResponseNormalizationOptions(_builder, node->radius(), node->bias(), + node->alpha(), node->beta()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLog *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleLogicalAnd *) + { + return circle::CreateLogicalAndOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLogicalNot *) + { + return circle::CreateLogicalNotOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLogicalOr *) + { + return circle::CreateLogicalOrOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleLogistic *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleLogSoftmax *) + { + return circle::CreateLogSoftmaxOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMatrixDiag *) + { + return circle::CreateMatrixDiagOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMatrixSetDiag *) + { + return circle::CreateMatrixSetDiagOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMaximum *) + { + return circle::CreateMaximumMinimumOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMaxPool2D *node) + { + return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(), + node->stride()->h(), node->filter()->w(), + node->filter()->h(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMean *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMinimum *) + { + return circle::CreateMaximumMinimumOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMirrorPad *node) + { + return circle::CreateMirrorPadOptions(_builder, to_circle_mirrorpadmode(node->mode())).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleMul *node) + { + return circle::CreateMulOptions(_builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleNeg *) + { + return circle::CreateNegOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV4 *) + { + return circle::CreateNonMaxSuppressionV4Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV5 *) + { + return circle::CreateNonMaxSuppressionV5Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleNotEqual *) + { + return circle::CreateNotEqualOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleOneHot *node) + { + return circle::CreateOneHotOptions(_builder, node->axis()).Union(); + } + flatbuffers::Offset<void> visit(luci::CirclePack *node) + { + return circle::CreatePackOptions(_builder, node->values_count(), node->axis()).Union(); + } + flatbuffers::Offset<void> visit(luci::CirclePad *) + { + return circle::CreatePadOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CirclePadV2 *) + { + return circle::CreatePadV2Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CirclePow *) + { + return circle::CreatePowOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CirclePRelu *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleQuantize *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleRange *) + { + return circle::CreateRangeOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleRank *) + { + return circle::CreateRankOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReduceAny *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReduceMax *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReduceMin *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReduceProd *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleRelu *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleRelu6 *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleReluN1To1 *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleReshape *node) + { + auto new_shape = _builder.CreateVector<int32_t>( + node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); }); + return circle::CreateReshapeOptions(_builder, new_shape).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleResizeBilinear *node) + { + return circle::CreateResizeBilinearOptions(_builder, node->align_corners(), + node->half_pixel_centers()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleResizeNearestNeighbor *node) + { + return circle::CreateResizeNearestNeighborOptions(_builder, node->align_corners()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReverseSequence *node) + { + return circle::CreateReverseSequenceOptions(_builder, node->seq_axis(), node->batch_axis()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleReverseV2 *) + { + return circle::CreateReverseV2Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleRound *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleRsqrt *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleScatterNd *) + { + return circle::CreateScatterNdOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSegmentSum *) + { + return circle::CreateSegmentSumOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSelect *) + { + return circle::CreateSelectOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSelectV2 *) + { + return circle::CreateSelectV2Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleShape *node) + { + return circle::CreateShapeOptions(_builder, luci::to_circle_tensortype(node->out_type())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSin *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleSlice *) + { + return circle::CreateSliceOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSoftmax *node) + { + return circle::CreateSoftmaxOptions(_builder, node->beta()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSpaceToBatchND *) + { + return circle::CreateSpaceToBatchNDOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSpaceToDepth *node) + { + return circle::CreateSpaceToDepthOptions(_builder, node->block_size()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSparseToDense *node) + { + return circle::CreateSparseToDenseOptions(_builder, node->validate_indices()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSplit *node) + { + return circle::CreateSplitOptions(_builder, node->num_split()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSplitV *node) + { + return circle::CreateSplitVOptions(_builder, node->num_split()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSqrt *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleSquare *) + { + return circle::CreateSquareOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSquaredDifference *) + { + return circle::CreateSquaredDifferenceOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSqueeze *node) + { + auto squeeze_dims = _builder.CreateVector<int32_t>(node->squeeze_dims()); + return circle::CreateSqueezeOptions(_builder, squeeze_dims).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleStridedSlice *node) + { + return circle::CreateStridedSliceOptions(_builder, node->begin_mask(), node->end_mask(), + node->ellipsis_mask(), node->new_axis_mask(), + node->shrink_axis_mask()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSub *node) + { + return circle::CreateSubOptions(_builder, to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSum *node) + { + return circle::CreateReducerOptions(_builder, node->keep_dims()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleSVDF *node) + { + return circle::CreateSVDFOptions(_builder, node->svdf_rank(), + to_circle_actfunc(node->fusedActivationFunction()), + node->asymmetric_quantize_inputs()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleTanh *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleTile *) + { + return circle::CreateTileOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleTopKV2 *) + { + return circle::CreateTopKV2Options(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleTranspose *) + { + return circle::CreateTransposeOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleTransposeConv *node) + { + return circle::CreateTransposeConvOptions(_builder, getOpPadding(node->padding()), + node->stride()->w(), node->stride()->h()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleUnidirectionalSequenceLSTM *node) + { + return circle::CreateUnidirectionalSequenceLSTMOptions( + _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(), + node->proj_clip(), node->time_major(), node->asymmetric_quantize_inputs()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleUnique *node) + { + return circle::CreateUniqueOptions(_builder, luci::to_circle_tensortype(node->idx_out_type())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleUnpack *node) + { + return circle::CreateUnpackOptions(_builder, node->num(), node->axis()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleWhere *) + { + return circle::CreateWhereOptions(_builder).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleWhile *node) + { + return circle::CreateWhileOptions(_builder, node->cond_branch(), node->body_branch()).Union(); + } + flatbuffers::Offset<void> visit(luci::CircleZerosLike *) + { + return circle::CreateZerosLikeOptions(_builder).Union(); + } + // Circle only + flatbuffers::Offset<void> visit(luci::CircleBCQFullyConnected *node) + { + return circle::CreateBCQFullyConnectedOptions( + _builder, node->weights_hidden_size(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleBCQGather *node) + { + return circle::CreateBCQGatherOptions(_builder, node->input_hidden_size(), node->axis()) + .Union(); + } + flatbuffers::Offset<void> visit(luci::CircleInstanceNorm *node) + { + return circle::CreateInstanceNormOptions(_builder, node->epsilon(), + to_circle_actfunc(node->fusedActivationFunction())) + .Union(); + } + +protected: + flatbuffers::FlatBufferBuilder &_builder; + +private: + const flatbuffers::Offset<void> _no_option = 0; +}; + +} // namespace luci + +#endif // __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__ diff --git a/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h new file mode 100644 index 000000000..6f7c0f70e --- /dev/null +++ b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__ +#define __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__ + +#include <luci/IR/CircleNode.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +class BuiltinOperatorMappingRule final : public CircleNodeVisitor<circle::BuiltinOperator> +{ +public: + BuiltinOperatorMappingRule() + { + // DO NOTHING + } + +public: + static BuiltinOperatorMappingRule &get() + { + static BuiltinOperatorMappingRule instance; + return instance; + } + +public: +#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \ + circle::BuiltinOperator visit(const CIRCLE_NODE *) final { return circle::OP; } +// Virtual nodes are not circle builtin operator +#define CIRCLE_VNODE(CIRCLE_NODE) +#include "CircleOps.lst" +#undef CIRCLE_VNODE +#undef CIRCLE_NODE +}; + +class BuiltinOptionsMappingRule final : public CircleNodeVisitor<circle::BuiltinOptions> +{ +public: + BuiltinOptionsMappingRule() + { + // DO NOTHING + } + +public: + static BuiltinOptionsMappingRule &get() + { + static BuiltinOptionsMappingRule instance; + return instance; + } + +public: +#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \ + circle::BuiltinOptions visit(const CIRCLE_NODE *) final { return circle::OPTION; } +// Virtual nodes are not circle builtin operator +#define CIRCLE_VNODE(CIRCLE_NODE) +#include "CircleOps.lst" +#undef CIRCLE_VNODE +#undef CIRCLE_NODE +}; + +} // namespace luci + +#endif // __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__ diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp index 5868c176c..083add9be 100644 --- a/compiler/luci/export/src/CircleExporterImpl.cpp +++ b/compiler/luci/export/src/CircleExporterImpl.cpp @@ -79,14 +79,19 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode, for (auto it : opcodes) { uint32_t idx = it.second; + int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES + if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES) + dep_code = static_cast<int8_t>(it.first.opcode); if (it.first.opcode != BuiltinOperator_CUSTOM) { - operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode, 0, it.first.version); + operator_codes_vec[idx] = + CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode); } else { operator_codes_vec[idx] = - CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code)); + CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code), + it.first.version, it.first.opcode); } } diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp index 3a7ba304f..9473c2c4e 100644 --- a/compiler/luci/export/src/CircleExporterUtils.cpp +++ b/compiler/luci/export/src/CircleExporterUtils.cpp @@ -15,6 +15,7 @@ */ #include "CircleExporterUtils.h" +#include "CircleBuiltinTypesMappingRule.h" #include <oops/InternalExn.h> @@ -163,36 +164,63 @@ circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVe } } -} // namespace luci +circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node) +{ + return node->accept(&BuiltinOperatorMappingRule::get()); +} -namespace luci +circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node) { + if (auto cast = dynamic_cast<const luci::CircleCast *>(node)) + { + return (cast->out_data_type() == loco::DataType::Unknown) ? circle::BuiltinOptions_NONE + : circle::BuiltinOptions_CastOptions; + } -uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code, - const int32_t op_version) + return node->accept(&BuiltinOptionsMappingRule::get()); +} + +std::string circle_custom_code(const luci::CircleNode *node) { - assert(op_version > 0); + if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node)) + { + return custom_node->custom_code(); + } - auto it = _operator_codes.find(OpCode{builtin_code, "", op_version}); - if (it != _operator_codes.end()) + return ""; +} + +flatbuffers::Offset<flatbuffers::Vector<uint8_t>> +circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node) +{ + if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node)) { - return it->second; + std::vector<uint8_t> custom_options_vec{custom_node->custom_options().begin(), + custom_node->custom_options().end()}; + return fb.CreateVector(custom_options_vec); } - auto idx = static_cast<uint32_t>(_operator_codes.size()); - _operator_codes.emplace(OpCode{builtin_code, "", op_version}, idx); - return idx; + + return 0; } -uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_code) +} // namespace luci + +namespace luci { - const circle::BuiltinOperator builtin_code = circle::BuiltinOperator_CUSTOM; - auto it = _operator_codes.find(OpCode{builtin_code, custom_code}); + +uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code, + const std::string &custom_code, + const int32_t op_version) +{ + assert(op_version > 0); + + auto it = _operator_codes.find(OpCode{builtin_code, custom_code, op_version}); if (it != _operator_codes.end()) { return it->second; } auto idx = static_cast<uint32_t>(_operator_codes.size()); - _operator_codes.emplace(OpCode{builtin_code, custom_code}, idx); + _operator_codes.emplace(OpCode{builtin_code, custom_code, op_version}, idx); return idx; } diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h index 95310b353..4a4c54a69 100644 --- a/compiler/luci/export/src/CircleExporterUtils.h +++ b/compiler/luci/export/src/CircleExporterUtils.h @@ -39,6 +39,12 @@ flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferB const SparseIndexVector &sparse_idx_vec); circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVectorType type); +circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node); +circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node); +std::string circle_custom_code(const luci::CircleNode *node); +flatbuffers::Offset<flatbuffers::Vector<uint8_t>> +circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node); + } // namespace luci namespace luci diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp index be64a52d4..b300a7fcf 100644 --- a/compiler/luci/export/src/CircleOperationExporter.cpp +++ b/compiler/luci/export/src/CircleOperationExporter.cpp @@ -15,1686 +15,30 @@ */ #include "CircleOperationExporter.h" -#include "CircleExporterUtils.h" -#include "Check.h" +#include "CircleOperationExporterRule.h" #include <luci/IR/CircleNode.h> -#include <luci/IR/CircleNodes.h> -#include <luci/IR/CircleNodeVisitor.h> #include <luci/Profile/CircleNodeOrigin.h> #include <luci/Plan/CircleNodeExecutionPlan.h> -#include <luci/UserSettings.h> -#include <luci/Log.h> +#include <loco/IR/Algorithm.h> -#include <loco/IR/CanonicalNodeVisitor.h> -#include <oops/InternalExn.h> - -#include <flatbuffers/flexbuffers.h> - -using namespace flatbuffers; -using namespace circle; - -namespace -{ - -using namespace luci; - -struct ExportContext -{ - FlatBufferBuilder &builder; - SerializedModelData &md; - SerializedGraphData &gd; -}; - -/** - * @brief Exports CircleMaxPool2D or CircleAveragePool2D - * - * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D - */ -template <class CirclePool2D> -void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op) -{ - LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D || - builtin_op == circle::BuiltinOperator_L2_POOL_2D || - builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D, - "Should be L2Pool, MaxPool or AvgPool"); - LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set"); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - - circle::Padding padding = getOpPadding(node->padding()); - - auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(), - node->filter()->w(), node->filter()->h(), - to_circle_actfunc(node->fusedActivationFunction())); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_Pool2DOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -/** - * @brief export simple nodes - */ -void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop, - circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset) -{ - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(node)}; - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->arg(i))); - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset); - ctx.gd._operators.push_back(op_offset); -} - -/** - * @brief export simple nodes having void options - */ -void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop) -{ - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->arg(i))); - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleAddN *node) -{ - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - - for (uint32_t i = 0; i < node->arity(); ++i) - inputs_vec.push_back(get_tensor_index(node->inputs(i))); - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateAddNOptions(ctx.builder); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_AddNOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleCast *node) -{ - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->x())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - - flatbuffers::Offset<Operator> op_offset; - if (node->out_data_type() != loco::DataType::Unknown) - { - auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()), - to_circle_tensortype(node->out_data_type())); - op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_CastOptions, options.Union()); - } - else - { - op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs); - } - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleConcatenation *node) -{ - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - - for (uint32_t i = 0; i < node->numValues(); ++i) - inputs_vec.push_back(get_tensor_index(node->values(i))); - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateConcatenationOptions(ctx.builder, node->axis(), - to_circle_actfunc(node->fusedActivationFunction())); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_ConcatenationOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleCustom *node) -{ - auto custom_outputs = loco::succs(node); - assert(custom_outputs.size() == node->numOutputs()); - - uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - for (uint32_t index = 0; index < node->numInputs(); index++) - { - inputs_vec.push_back(get_tensor_index(node->inputs(index))); - } - for (uint32_t index = 0; index < custom_outputs.size(); index++) - { - // store in order of index - bool found = false; - for (auto out : custom_outputs) - { - auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out); - if (custom_out->index() == static_cast<int32_t>(index)) - { - outputs_vec.push_back(get_tensor_index(custom_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Custom output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options; - std::vector<uint8_t> custom_options_vec{node->custom_options().begin(), - node->custom_options().end()}; - circle_custom_options = ctx.builder.CreateVector(custom_options_vec); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE, - flatbuffers::Offset<void>(), circle_custom_options); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleIf *node) -{ - auto if_outs = loco::succs(node); - assert(if_outs.size() == node->output_count()); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - inputs_vec.push_back(get_tensor_index(node->cond())); - for (uint32_t idx = 0; idx < node->input_count(); ++idx) - inputs_vec.push_back(get_tensor_index(node->input(idx))); - - for (uint32_t idx = 0; idx < node->output_count(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : if_outs) - { - auto if_out = loco::must_cast<luci::CircleIfOut *>(out); - if (if_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(if_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid CircleIf output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch()); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_IfOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node) -{ - auto nms_outs = loco::succs(node); - assert(nms_outs.size() == 2); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, - node->op_version()); - std::vector<int32_t> inputs_vec{ - get_tensor_index(node->boxes()), get_tensor_index(node->scores()), - get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()), - get_tensor_index(node->score_threshold()), - }; - std::vector<int32_t> outputs_vec; - - for (uint32_t idx = 0; idx < nms_outs.size(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : nms_outs) - { - auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out); - if (nms_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(nms_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid NonMaxSuppressionV4 output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateNonMaxSuppressionV4Options(ctx.builder); - auto op_offset = - CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node) -{ - auto nms_outs = loco::succs(node); - assert(nms_outs.size() == 3); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5, - node->op_version()); - std::vector<int32_t> inputs_vec{ - get_tensor_index(node->boxes()), get_tensor_index(node->scores()), - get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()), - get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()), - }; - std::vector<int32_t> outputs_vec; - - for (uint32_t idx = 0; idx < nms_outs.size(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : nms_outs) - { - auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out); - if (nms_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(nms_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid NonMaxSuppressionV5 output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateNonMaxSuppressionV5Options(ctx.builder); - auto op_offset = - CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleReverseV2 *node) -{ - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())}; - std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))}; - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateReverseV2Options(ctx.builder); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_ReverseSequenceOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleSplit *node) -{ - auto split_outs = loco::succs(node); - assert(int32_t(split_outs.size()) == node->num_split()); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version()); - // NOTE BuiltinOperator_SPLIT input is placed at second position - std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()), - get_tensor_index(node->input())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num_split(); index++) - { - // store in order of index - bool found = false; - for (auto out : split_outs) - { - auto split_out = loco::must_cast<luci::CircleSplitOut *>(out); - if (split_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(split_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Split output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateSplitOptions(ctx.builder, node->num_split()); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_SplitOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleSplitV *node) -{ - auto split_outs = loco::succs(node); - assert(int32_t(split_outs.size()) == node->num_split()); - - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), - get_tensor_index(node->size_splits()), - get_tensor_index(node->split_dim())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num_split(); index++) - { - // store in order of index - bool found = false; - for (auto out : split_outs) - { - auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out); - if (split_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(split_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid SplitV output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateSplitVOptions(ctx.builder, node->num_split()); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_SplitVOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleTopKV2 *node) -{ - auto topkv2_outs = loco::succs(node); - int outs_count = int32_t(topkv2_outs.size()); - assert(outs_count == 2); - - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < outs_count; index++) - { - // store in order of index - bool found = false; - for (auto out : topkv2_outs) - { - auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out); - if (topkv2_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(topkv2_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid TopKV2 output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateTopKV2Options(ctx.builder); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_TopKV2Options, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleUnique *node) -{ - auto unique_outs = loco::succs(node); - assert(int32_t(unique_outs.size()) == 2); - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version()); - - std::vector<int32_t> inputs_vec{get_tensor_index(node->input())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < 2; index++) - { - // store in order of index - bool found = false; - for (auto out : unique_outs) - { - auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out); - if (unique_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(unique_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid Unique output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type())); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_UniqueOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleUnpack *node) -{ - LOGGER(l); - auto settings = luci::UserSettings::settings(); - - auto unpack_outs = loco::succs(node); - // NOTE real models may not use all of the outputs - if (static_cast<int32_t>(unpack_outs.size()) != node->num()) - { - if (settings->get(luci::UserSettings::Key::DisableValidation)) - { - WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs"; - } - else - assert(false); - } - - uint32_t op_idx = - ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version()); - std::vector<int32_t> inputs_vec{get_tensor_index(node->value())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < node->num(); index++) - { - // store in order of index - bool found = false; - for (auto out : unpack_outs) - { - auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out); - if (unpack_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(unpack_out)); - found = true; - break; - } - } - // NOTE real models may not use all of the outputs - if (!found) - { - if (settings->get(luci::UserSettings::Key::DisableValidation)) - { - WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used"; - } - else - assert(false); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis()); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_UnpackOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -void export_node(ExportContext &ctx, luci::CircleWhile *node) -{ - auto while_outs = loco::succs(node); - assert(while_outs.size() == node->output_count()); - - uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version()); - std::vector<int32_t> inputs_vec; - std::vector<int32_t> outputs_vec; - - for (uint32_t idx = 0; idx < node->input_count(); ++idx) - inputs_vec.push_back(get_tensor_index(node->input(idx))); - - for (uint32_t idx = 0; idx < node->output_count(); ++idx) - { - // store in order of index - bool found = false; - for (auto out : while_outs) - { - auto while_out = loco::must_cast<luci::CircleWhileOut *>(out); - if (while_out->index() == static_cast<int32_t>(idx)) - { - outputs_vec.push_back(get_tensor_index(while_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid CircleWhile output"); - } - } - - auto inputs = ctx.builder.CreateVector(inputs_vec); - auto outputs = ctx.builder.CreateVector(outputs_vec); - auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch()); - auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_WhileOptions, options.Union()); - ctx.gd._operators.push_back(op_offset); -} - -class ExportHelper -{ -public: - ExportHelper(ExportContext &ctx) : _ctx{ctx} - { - // DO NOTHING - } - -protected: - /** - * @brief export simple nodes - */ - void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot, - flatbuffers::Offset<void> options_offset) - { - export_node(_ctx, node, bop, bot, options_offset); - } - - /** - * @brief export simple nodes having void options - */ - void export_simple(loco::Node *node, circle::BuiltinOperator bop) - { - export_node(_ctx, node, bop); - } - -protected: - ExportContext &_ctx; -}; - -enum class OE -{ - ABC, - DEF, - GHIJ, - KLMN, - OPQR, - STUV, - WXYZ, - CIRC, // circle only - VIRT, // virtual -}; - -class OperationExporter final : public ExportHelper -{ -public: - OperationExporter(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void export_node(luci::CircleNode *); -}; - -template <OE oe> class OpExporterLet; - -template <> -class OpExporterLet<OE::ABC> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - // NOTE visit for luci::CircleNode is added NOT to throw NYI - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleAbs *) final; - void visit(luci::CircleAdd *) final; - void visit(luci::CircleAddN *) final; - void visit(luci::CircleArgMax *) final; - void visit(luci::CircleArgMin *) final; - void visit(luci::CircleAveragePool2D *) final; - void visit(luci::CircleBatchMatMul *) final; - void visit(luci::CircleBatchToSpaceND *) final; - void visit(luci::CircleBidirectionalSequenceLSTM *) final; - void visit(luci::CircleCast *) final; - void visit(luci::CircleCeil *) final; - void visit(luci::CircleConcatenation *) final; - void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */}; - void visit(luci::CircleConv2D *) final; - void visit(luci::CircleCos *) final; - void visit(luci::CircleCustom *) final; -}; - -template <> -class OpExporterLet<OE::DEF> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleDepthToSpace *) final; - void visit(luci::CircleDepthwiseConv2D *) final; - void visit(luci::CircleDequantize *) final; - void visit(luci::CircleDiv *) final; - void visit(luci::CircleElu *) final; - void visit(luci::CircleEqual *) final; - void visit(luci::CircleExp *) final; - void visit(luci::CircleExpandDims *) final; - void visit(luci::CircleFakeQuant *) final; - void visit(luci::CircleFill *) final; - void visit(luci::CircleFloor *) final; - void visit(luci::CircleFloorDiv *) final; - void visit(luci::CircleFloorMod *) final; - void visit(luci::CircleFullyConnected *) final; -}; - -template <> -class OpExporterLet<OE::GHIJ> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleGather *) final; - void visit(luci::CircleGatherNd *) final; - void visit(luci::CircleGreater *) final; - void visit(luci::CircleGreaterEqual *) final; - void visit(luci::CircleIf *) final; -}; - -template <> -class OpExporterLet<OE::KLMN> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleL2Normalize *) final; - void visit(luci::CircleL2Pool2D *) final; - void visit(luci::CircleLeakyRelu *) final; - void visit(luci::CircleLess *) final; - void visit(luci::CircleLessEqual *) final; - void visit(luci::CircleLocalResponseNormalization *) final; - void visit(luci::CircleLog *) final; - void visit(luci::CircleLogicalAnd *) final; - void visit(luci::CircleLogicalNot *) final; - void visit(luci::CircleLogicalOr *) final; - void visit(luci::CircleLogistic *) final; - void visit(luci::CircleLogSoftmax *) final; - void visit(luci::CircleMatrixDiag *) final; - void visit(luci::CircleMatrixSetDiag *) final; - void visit(luci::CircleMaximum *) final; - void visit(luci::CircleMaxPool2D *) final; - void visit(luci::CircleMean *) final; - void visit(luci::CircleMinimum *) final; - void visit(luci::CircleMirrorPad *) final; - void visit(luci::CircleMul *) final; - void visit(luci::CircleNeg *) final; - void visit(luci::CircleNonMaxSuppressionV4 *) final; - void visit(luci::CircleNonMaxSuppressionV5 *) final; - void visit(luci::CircleNotEqual *) final; -}; - -template <> -class OpExporterLet<OE::OPQR> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleOneHot *) final; - void visit(luci::CirclePack *) final; - void visit(luci::CirclePad *) final; - void visit(luci::CirclePadV2 *) final; - void visit(luci::CirclePow *) final; - void visit(luci::CirclePRelu *) final; - void visit(luci::CircleQuantize *) final; - void visit(luci::CircleRange *) final; - void visit(luci::CircleRank *) final; - void visit(luci::CircleReduceAny *) final; - void visit(luci::CircleReduceMax *) final; - void visit(luci::CircleReduceMin *) final; - void visit(luci::CircleReduceProd *) final; - void visit(luci::CircleRelu *) final; - void visit(luci::CircleRelu6 *) final; - void visit(luci::CircleReluN1To1 *) final; - void visit(luci::CircleReshape *) final; - void visit(luci::CircleResizeBilinear *) final; - void visit(luci::CircleResizeNearestNeighbor *) final; - void visit(luci::CircleReverseSequence *) final; - void visit(luci::CircleReverseV2 *) final; - void visit(luci::CircleRound *) final; - void visit(luci::CircleRsqrt *) final; -}; - -template <> -class OpExporterLet<OE::STUV> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleScatterNd *) final; - void visit(luci::CircleSegmentSum *) final; - void visit(luci::CircleSelect *) final; - void visit(luci::CircleSelectV2 *) final; - void visit(luci::CircleShape *) final; - void visit(luci::CircleSin *) final; - void visit(luci::CircleSlice *) final; - void visit(luci::CircleSoftmax *) final; - void visit(luci::CircleSpaceToBatchND *) final; - void visit(luci::CircleSpaceToDepth *) final; - void visit(luci::CircleSparseToDense *) final; - void visit(luci::CircleSplit *) final; - void visit(luci::CircleSplitV *) final; - void visit(luci::CircleSqrt *) final; - void visit(luci::CircleSquare *) final; - void visit(luci::CircleSquaredDifference *) final; - void visit(luci::CircleSqueeze *) final; - void visit(luci::CircleStridedSlice *) final; - void visit(luci::CircleSub *) final; - void visit(luci::CircleSum *) final; - void visit(luci::CircleTanh *) final; - void visit(luci::CircleTile *) final; - void visit(luci::CircleTopKV2 *) final; - void visit(luci::CircleTranspose *) final; - void visit(luci::CircleTransposeConv *) final; - void visit(luci::CircleUnidirectionalSequenceLSTM *) final; - void visit(luci::CircleUnique *) final; - void visit(luci::CircleUnpack *) final; -}; - -template <> -class OpExporterLet<OE::WXYZ> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - void visit(luci::CircleWhere *) final; - void visit(luci::CircleWhile *) final; - void visit(luci::CircleZerosLike *) final; -}; - -template <> -class OpExporterLet<OE::CIRC> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - // Circle only - void visit(luci::CircleBCQFullyConnected *) final; - void visit(luci::CircleBCQGather *) final; - void visit(luci::CircleInstanceNorm *) final; -}; - -template <> -class OpExporterLet<OE::VIRT> final : public luci::CircleNodeMutableVisitor<void>, - public ExportHelper -{ -public: - OpExporterLet(ExportContext &ctx) : ExportHelper(ctx) - { - // DO NOTHING - } - -public: - void visit(luci::CircleNode *) final {} - -public: - // Virtual - void visit(luci::CircleInput *) final {} - void visit(luci::CircleOutput *) final {} - void visit(luci::CircleOutputDummy *) final {} - void visit(luci::CircleOutputExclude *) final {} - // Virtual for multiple-outputs - void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {} - void visit(luci::CircleCustomOut *) final {} - void visit(luci::CircleIfOut *) final {} - void visit(luci::CircleNonMaxSuppressionV4Out *) final {} - void visit(luci::CircleNonMaxSuppressionV5Out *) final {} - void visit(luci::CircleSplitOut *) final {} - void visit(luci::CircleSplitVOut *) final {} - void visit(luci::CircleTopKV2Out *) final {} - void visit(luci::CircleUniqueOut *) final {} - void visit(luci::CircleUnpackOut *) final {} - void visit(luci::CircleWhileOut *) final {} -}; - -void OperationExporter::export_node(luci::CircleNode *node) -{ - // TODO revise return type to bool and return if handled -#define VISIT_OE(GRP) \ - do \ - { \ - OpExporterLet<OE::GRP> oe(_ctx); \ - node->accept(&oe); \ - } while (false) - - VISIT_OE(ABC); - VISIT_OE(DEF); - VISIT_OE(GHIJ); - VISIT_OE(KLMN); - VISIT_OE(OPQR); - VISIT_OE(STUV); - VISIT_OE(WXYZ); - VISIT_OE(CIRC); - VISIT_OE(VIRT); - -#undef VISIT_OE -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleAbs *node) -{ - export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions, - CreateAbsOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleAdd *node) -{ - export_simple( - node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions, - CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleAddN *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::ABC>::visit(luci::CircleArgMax *node) -{ - export_simple( - node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions, - CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleArgMin *node) -{ - export_simple( - node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions, - CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleAveragePool2D *node) -{ - export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleBatchMatMul *node) -{ - export_simple(node, circle::BuiltinOperator_BATCH_MATMUL, - circle::BuiltinOptions_BatchMatMulOptions, - CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleBidirectionalSequenceLSTM *node) -{ - auto bidi_lstm_outs = loco::succs(node); - assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2)); - uint32_t op_idx = _ctx.md.registerBuiltinOpcode( - circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, node->op_version()); - - std::vector<int32_t> inputs_vec{get_tensor_index(node->input())}; - std::vector<int32_t> outputs_vec; - - for (int32_t index = 0; index < 2; index++) - { - // store in order of index - bool found = false; - for (auto out : bidi_lstm_outs) - { - auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out); - if (bidi_lstm_out->index() == index) - { - outputs_vec.push_back(get_tensor_index(bidi_lstm_out)); - found = true; - break; - } - } - if (!found) - { - INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output"); - } - } - - auto inputs = _ctx.builder.CreateVector(inputs_vec); - auto outputs = _ctx.builder.CreateVector(outputs_vec); - auto options = CreateBidirectionalSequenceLSTMOptions( - _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(), - node->proj_clip(), node->merge_outputs(), node->time_major(), - node->asymmetric_quantize_inputs()); - auto op_offset = - CreateOperator(_ctx.builder, op_idx, inputs, outputs, - circle::BuiltinOptions_BidirectionalSequenceLSTMOptions, options.Union()); - _ctx.gd._operators.push_back(op_offset); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleCast *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::ABC>::visit(luci::CircleCeil *node) -{ - export_simple(node, circle::BuiltinOperator_CEIL); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::ABC>::visit(luci::CircleBatchToSpaceND *node) -{ - export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND, - circle::BuiltinOptions_BatchToSpaceNDOptions, - CreateBatchToSpaceNDOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleConv2D *node) -{ - export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions, - CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()), - node->stride()->w(), node->stride()->h(), - to_circle_actfunc(node->fusedActivationFunction()), - node->dilation()->w(), node->dilation()->h()) - .Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleCos *node) -{ - export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions, - CreateCosOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::ABC>::visit(luci::CircleCustom *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::DEF>::visit(luci::CircleDepthToSpace *node) -{ - export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE, - circle::BuiltinOptions_DepthToSpaceOptions, - CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleDepthwiseConv2D *node) -{ - export_simple( - node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions, - CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(), - node->stride()->h(), node->depthMultiplier(), - to_circle_actfunc(node->fusedActivationFunction()), - node->dilation()->w(), node->dilation()->h()) - .Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleDequantize *node) -{ - export_simple(node, circle::BuiltinOperator_DEQUANTIZE); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleDiv *node) -{ - export_simple( - node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions, - CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleElu *node) -{ - export_simple(node, circle::BuiltinOperator_ELU); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleEqual *node) -{ - export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions, - CreateEqualOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleExp *node) -{ - export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions, - CreateExpOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleExpandDims *node) -{ - export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions, - CreateExpandDimsOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFakeQuant *node) -{ - export_simple(node, circle::BuiltinOperator_FAKE_QUANT, circle::BuiltinOptions_FakeQuantOptions, - CreateFakeQuantOptions(_ctx.builder, node->min(), node->max(), node->num_bits(), - node->narrow_range()) - .Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFill *node) -{ - export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions, - CreateFillOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFloor *node) -{ - export_simple(node, circle::BuiltinOperator_FLOOR); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFloorDiv *node) -{ - export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions, - CreateFloorDivOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFloorMod *node) -{ - export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions, - CreateFloorModOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::DEF>::visit(luci::CircleFullyConnected *node) -{ - export_simple( - node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions, - CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), - to_circle_weightsformat(node->weights_format())) - .Union()); -} - -void OpExporterLet<OE::GHIJ>::visit(luci::CircleGather *node) -{ - export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions, - CreateGatherOptions(_ctx.builder, node->axis()).Union()); -} - -void OpExporterLet<OE::GHIJ>::visit(luci::CircleGatherNd *node) -{ - export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions, - CreateGatherNdOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreater *node) -{ - export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions, - CreateGreaterOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreaterEqual *node) -{ - export_simple(node, circle::BuiltinOperator_GREATER_EQUAL, - circle::BuiltinOptions_GreaterEqualOptions, - CreateGreaterEqualOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::GHIJ>::visit(luci::CircleIf *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Normalize *node) -{ - export_simple( - node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions, - CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Pool2D *node) -{ - export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLeakyRelu *node) -{ - export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions, - CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLess *node) -{ - export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions, - CreateLessOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLessEqual *node) -{ - export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions, - CreateLessEqualOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLocalResponseNormalization *node) -{ - export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, - circle::BuiltinOptions_LocalResponseNormalizationOptions, - CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(), - node->alpha(), node->beta()) - .Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLog *node) -{ - export_simple(node, circle::BuiltinOperator_LOG); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalAnd *node) -{ - export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions, - CreateLogicalAndOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalNot *node) -{ - export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions, - CreateLogicalNotOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalOr *node) -{ - export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions, - CreateLogicalOrOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLogistic *node) -{ - export_simple(node, circle::BuiltinOperator_LOGISTIC); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleLogSoftmax *node) -{ - export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions, - CreateLogSoftmaxOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixDiag *node) -{ - export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions, - CreateMatrixDiagOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixSetDiag *node) -{ - export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG, - circle::BuiltinOptions_MatrixSetDiagOptions, - CreateMatrixSetDiagOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMaximum *node) -{ - export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions, - CreateMaximumMinimumOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMaxPool2D *node) -{ - export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMean *node) -{ - export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMinimum *node) -{ - export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions, - CreateMaximumMinimumOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMirrorPad *node) -{ - export_simple( - node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions, - CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleMul *node) -{ - export_simple( - node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions, - CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleNeg *node) -{ - export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions, - CreateNegOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV4 *node) -{ - export_node(_ctx, node); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV5 *node) -{ - export_node(_ctx, node); -} - -void OpExporterLet<OE::KLMN>::visit(luci::CircleNotEqual *node) -{ - export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions, - CreateNotEqualOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleOneHot *node) -{ - export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions, - CreateOneHotOptions(_ctx.builder, node->axis()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CirclePack *node) -{ - export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions, - CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CirclePad *node) -{ - export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions, - CreatePadOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CirclePadV2 *node) -{ - export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options, - CreatePadV2Options(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CirclePow *node) -{ - export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions, - CreatePowOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CirclePRelu *node) -{ - export_simple(node, circle::BuiltinOperator_PRELU); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleQuantize *node) -{ - export_simple(node, circle::BuiltinOperator_QUANTIZE); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRange *node) -{ - export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions, - CreateRangeOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRank *node) -{ - export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions, - CreateRankOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceAny *node) -{ - export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMax *node) -{ - export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMin *node) -{ - export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceProd *node) -{ - export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu *node) -{ - export_simple(node, circle::BuiltinOperator_RELU); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu6 *node) -{ - export_simple(node, circle::BuiltinOperator_RELU6); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReluN1To1 *node) -{ - export_simple(node, circle::BuiltinOperator_RELU_N1_TO_1); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReshape *node) -{ - auto new_shape = _ctx.builder.CreateVector<int32_t>( - node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); }); - - export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions, - CreateReshapeOptions(_ctx.builder, new_shape).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeBilinear *node) -{ - export_simple( - node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions, - CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers()) - .Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeNearestNeighbor *node) -{ - export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, - circle::BuiltinOptions_ResizeNearestNeighborOptions, - CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseSequence *node) -{ - export_simple( - node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions, - CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union()); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRound *node) -{ - export_simple(node, circle::BuiltinOperator_ROUND); -} - -void OpExporterLet<OE::OPQR>::visit(luci::CircleRsqrt *node) -{ - export_simple(node, circle::BuiltinOperator_RSQRT); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleScatterNd *node) -{ - export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions, - CreateScatterNdOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSegmentSum *node) -{ - export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions, - CreateSegmentSumOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSelect *node) -{ - export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions, - CreateSelectOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSelectV2 *node) -{ - export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options, - CreateSelectV2Options(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleShape *node) -{ - export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions, - CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSin *node) -{ - export_simple(node, circle::BuiltinOperator_SIN); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSlice *node) -{ - export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions, - CreateSliceOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSoftmax *node) -{ - export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions, - CreateSoftmaxOptions(_ctx.builder, node->beta()).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToBatchND *node) -{ - export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND, - circle::BuiltinOptions_SpaceToBatchNDOptions, - CreateSpaceToBatchNDOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToDepth *node) -{ - export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH, - circle::BuiltinOptions_SpaceToDepthOptions, - CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSparseToDense *node) -{ - export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE, - circle::BuiltinOptions_SparseToDenseOptions, - CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSplit *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::STUV>::visit(luci::CircleSplitV *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::STUV>::visit(luci::CircleSqrt *node) -{ - export_simple(node, circle::BuiltinOperator_SQRT); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSquare *node) -{ - export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions, - CreateSquareOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSquaredDifference *node) -{ - export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE, - circle::BuiltinOptions_SquaredDifferenceOptions, - CreateSquaredDifferenceOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSqueeze *node) -{ - auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims()); - export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions, - CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleStridedSlice *node) -{ - export_simple(node, circle::BuiltinOperator_STRIDED_SLICE, - circle::BuiltinOptions_StridedSliceOptions, - CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(), - node->ellipsis_mask(), node->new_axis_mask(), - node->shrink_axis_mask()) - .Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSub *node) -{ - export_simple( - node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions, - CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleSum *node) -{ - export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions, - CreateReducerOptions(_ctx.builder, node->keep_dims()).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleTanh *node) -{ - export_simple(node, circle::BuiltinOperator_TANH); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleTile *node) -{ - export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions, - CreateTileOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::STUV>::visit(luci::CircleTranspose *node) -{ - export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions, - CreateTransposeOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleTransposeConv *node) -{ - export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV, - circle::BuiltinOptions_TransposeConvOptions, - CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()), - node->stride()->w(), node->stride()->h()) - .Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleUnidirectionalSequenceLSTM *node) -{ - export_simple(node, circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, - circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions, - CreateUnidirectionalSequenceLSTMOptions( - _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), - node->cell_clip(), node->proj_clip(), node->time_major(), - node->asymmetric_quantize_inputs()) - .Union()); -} - -void OpExporterLet<OE::STUV>::visit(luci::CircleUnique *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::STUV>::visit(luci::CircleUnpack *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhere *node) -{ - export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions, - CreateWhereOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhile *node) { export_node(_ctx, node); } - -void OpExporterLet<OE::WXYZ>::visit(luci::CircleZerosLike *node) -{ - export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions, - CreateZerosLikeOptions(_ctx.builder).Union()); -} - -void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQFullyConnected *node) -{ - export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED, - circle::BuiltinOptions_BCQFullyConnectedOptions, - CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(), - to_circle_actfunc(node->fusedActivationFunction())) - .Union()); -} - -void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQGather *node) -{ - export_simple( - node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions, - CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union()); -} - -void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node) +namespace luci { - export_simple(node, circle::BuiltinOperator_INSTANCE_NORM, - circle::BuiltinOptions_InstanceNormOptions, - CreateInstanceNormOptions(_ctx.builder, node->epsilon(), - to_circle_actfunc(node->fusedActivationFunction())) - .Union()); -} -void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md, - SerializedGraphData &gd, uint32_t node_position) +void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md, + SerializedGraphData &gd) { - if (auto circle_node = dynamic_cast<luci::CircleNode *>(node)) + uint32_t node_position = 0; + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { ExportContext ctx{builder, md, gd}; - OperationExporter exporter{ctx}; + OperationExporterRule exporter_rule{ctx}; + + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + circle_node->accept(&exporter_rule); const auto ops_size = gd._operators.size(); - exporter.export_node(circle_node); if (has_origin(circle_node) && ops_size != gd._operators.size()) { const auto node_id = gd._operators.size() - 1; @@ -1716,25 +60,7 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria } md._metadata.add_execution_plan_table(node_position, execution_plan_vector); } - } - else - { - INTERNAL_EXN("Node with unsupported dialect found"); - } -} -} // namespace - -namespace luci -{ - -void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md, - SerializedGraphData &gd) -{ - uint32_t node_position = 0; - for (auto node : loco::postorder_traversal(loco::output_nodes(g))) - { - exportNode(node, builder, md, gd, node_position); node_position++; } } diff --git a/compiler/luci/export/src/CircleOperationExporter.h b/compiler/luci/export/src/CircleOperationExporter.h index de6abfc54..f2b3cfd6b 100644 --- a/compiler/luci/export/src/CircleOperationExporter.h +++ b/compiler/luci/export/src/CircleOperationExporter.h @@ -17,7 +17,7 @@ #ifndef __CIRCLE_OPERATION_EXPORTER_H__ #define __CIRCLE_OPERATION_EXPORTER_H__ -#include "CircleExporterUtils.h" +#include "SerializedData.h" #include <loco/IR/Graph.h> diff --git a/compiler/luci/export/src/CircleOperationExporterRule.cpp b/compiler/luci/export/src/CircleOperationExporterRule.cpp new file mode 100644 index 000000000..8dc59fa9c --- /dev/null +++ b/compiler/luci/export/src/CircleOperationExporterRule.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleOperationExporterRule.h" +#include "CircleBuiltinTypesExtractor.h" +#include "Check.h" + +#include <loco/IR/Graph.h> +#include <luci/IR/CircleNode.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <oops/InternalExn.h> + +#include <vector> + +namespace +{ +class OutputVectorExtractor final : public luci::CircleNodeMutableVisitor<std::vector<int32_t>> +{ +public: + OutputVectorExtractor() + { + // DO NOTHING + } + +public: + std::vector<int32_t> visit(luci::CircleNode *node) final + { + std::vector<int32_t> outputs_vec{luci::get_tensor_index(node)}; + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleBidirectionalSequenceLSTM *node) final + { + auto bidi_lstm_outs = loco::succs(node); + assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2)); + + std::vector<int32_t> outputs_vec(bidi_lstm_outs.size()); + + for (auto out : bidi_lstm_outs) + { + auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out); + if (bidi_lstm_out->index() >= int32_t(bidi_lstm_outs.size())) + INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output"); + outputs_vec[bidi_lstm_out->index()] = luci::get_tensor_index(bidi_lstm_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleCustom *node) final + { + auto custom_outputs = loco::succs(node); + assert(custom_outputs.size() == node->numOutputs()); + + std::vector<int32_t> outputs_vec(node->numOutputs()); + + for (auto out : custom_outputs) + { + auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out); + if (custom_out->index() >= int32_t(node->numOutputs())) + INTERNAL_EXN("Invalid Custom output"); + outputs_vec[custom_out->index()] = luci::get_tensor_index(custom_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleIf *node) final + { + auto if_outs = loco::succs(node); + assert(if_outs.size() == node->output_count()); + + std::vector<int32_t> outputs_vec(node->output_count()); + + for (auto out : if_outs) + { + auto if_out = loco::must_cast<luci::CircleIfOut *>(out); + if (if_out->index() >= int32_t(node->output_count())) + INTERNAL_EXN("Invalid If output"); + outputs_vec[if_out->index()] = luci::get_tensor_index(if_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV4 *node) final + { + auto nms_outs = loco::succs(node); + assert(nms_outs.size() == 2); + + std::vector<int32_t> outputs_vec(2); + + for (auto out : nms_outs) + { + auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out); + if (nms_out->index() >= 2) + INTERNAL_EXN("Invalid NonMaxSuppressionV4 output"); + outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV5 *node) final + { + auto nms_outs = loco::succs(node); + assert(nms_outs.size() == 3); + + std::vector<int32_t> outputs_vec(3); + + for (auto out : nms_outs) + { + auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out); + if (nms_out->index() >= 3) + INTERNAL_EXN("Invalid NonMaxSuppressionV5 output"); + outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleSplit *node) final + { + auto split_outs = loco::succs(node); + assert(int32_t(split_outs.size()) == node->num_split()); + + std::vector<int32_t> outputs_vec(node->num_split()); + + for (auto out : split_outs) + { + auto split_out = loco::must_cast<luci::CircleSplitOut *>(out); + if (split_out->index() >= node->num_split()) + INTERNAL_EXN("Invalid Split output"); + outputs_vec[split_out->index()] = luci::get_tensor_index(split_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleSplitV *node) final + { + auto split_outs = loco::succs(node); + assert(int32_t(split_outs.size()) == node->num_split()); + + std::vector<int32_t> outputs_vec(node->num_split()); + + for (auto out : split_outs) + { + auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out); + if (split_out->index() >= node->num_split()) + INTERNAL_EXN("Invalid SplitV output"); + outputs_vec[split_out->index()] = luci::get_tensor_index(split_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleTopKV2 *node) final + { + auto topkv2_outs = loco::succs(node); + assert(topkv2_outs.size() == 2); + + std::vector<int32_t> outputs_vec(2); + + for (auto out : topkv2_outs) + { + auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out); + if (topkv2_out->index() >= 2) + INTERNAL_EXN("Invalid TopKV2 output"); + outputs_vec[topkv2_out->index()] = luci::get_tensor_index(topkv2_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleUnique *node) final + { + auto unique_outs = loco::succs(node); + assert(unique_outs.size() == 2); + + std::vector<int32_t> outputs_vec(2); + + for (auto out : unique_outs) + { + auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out); + if (unique_out->index() >= 2) + INTERNAL_EXN("Invalid Unique output"); + outputs_vec[unique_out->index()] = luci::get_tensor_index(unique_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleUnpack *node) final + { + auto unpack_outs = loco::succs(node); + assert(int32_t(unpack_outs.size()) == node->num()); + + std::vector<int32_t> outputs_vec(node->num()); + + for (auto out : unpack_outs) + { + auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out); + if (unpack_out->index() >= node->num()) + INTERNAL_EXN("Invalid Unpack output"); + outputs_vec[unpack_out->index()] = luci::get_tensor_index(unpack_out); + } + + return outputs_vec; + } + + std::vector<int32_t> visit(luci::CircleWhile *node) final + { + auto while_outs = loco::succs(node); + assert(while_outs.size() == node->output_count()); + + std::vector<int32_t> outputs_vec(node->output_count()); + + for (auto out : while_outs) + { + auto while_out = loco::must_cast<luci::CircleWhileOut *>(out); + if (while_out->index() >= int32_t(node->output_count())) + INTERNAL_EXN("Invalid While output"); + outputs_vec[while_out->index()] = luci::get_tensor_index(while_out); + } + + return outputs_vec; + } +}; + +} // namespace + +namespace luci +{ + +void OperationExporterRule::visit(luci::CircleNode *node) +{ + auto op_idx = _ctx.md.registerBuiltinOpcode(circle_builtin_operator(node), + circle_custom_code(node), node->op_version()); + + std::vector<int32_t> inputs_vec; + for (uint32_t i = 0; i < node->arity(); ++i) + inputs_vec.push_back(luci::get_tensor_index(node->arg(i))); + auto inputs = _ctx.builder.CreateVector(inputs_vec); + + OutputVectorExtractor outputs_vec_extractor; + auto outputs_vec = node->accept(&outputs_vec_extractor); + auto outputs = _ctx.builder.CreateVector(outputs_vec); + + auto builtin_options = circle_builtin_options(node); + + luci::BuiltinOptionsExtractor builtin_options_extractor(_ctx.builder); + auto options_offset = node->accept(&builtin_options_extractor); + + // If node is not CircleCustom, null offset(0) is returned + auto custom_options = circle_custom_options(_ctx.builder, node); + + auto op_offset = circle::CreateOperator(_ctx.builder, op_idx, inputs, outputs, builtin_options, + options_offset, custom_options); + _ctx.gd._operators.push_back(op_offset); +} + +} // namespace luci diff --git a/compiler/luci/export/src/CircleOperationExporterRule.h b/compiler/luci/export/src/CircleOperationExporterRule.h new file mode 100644 index 000000000..23e7546cf --- /dev/null +++ b/compiler/luci/export/src/CircleOperationExporterRule.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_OPERATION_EXPORTER_RULE_H__ +#define __CIRCLE_OPERATION_EXPORTER_RULE_H__ + +#include "CircleOperationExporter.h" + +#include <luci/IR/CircleNode.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +struct ExportContext +{ + flatbuffers::FlatBufferBuilder &builder; + luci::SerializedModelData &md; + luci::SerializedGraphData &gd; +}; + +class OperationExporterRule final : public luci::CircleNodeMutableVisitor<void> +{ +public: + OperationExporterRule(ExportContext &ctx) : _ctx{ctx} + { + // DO NOTHING + } + +public: + // Default export rule + void visit(luci::CircleNode *node) final; + + // Non-virtual + void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */}; + + // Virtual + void visit(luci::CircleInput *) final {} + void visit(luci::CircleOutput *) final {} + void visit(luci::CircleOutputDummy *) final {} + void visit(luci::CircleOutputExclude *) final {} + // Virtual for multiple-outputs + void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {} + void visit(luci::CircleCustomOut *) final {} + void visit(luci::CircleIfOut *) final {} + void visit(luci::CircleNonMaxSuppressionV4Out *) final {} + void visit(luci::CircleNonMaxSuppressionV5Out *) final {} + void visit(luci::CircleSplitOut *) final {} + void visit(luci::CircleSplitVOut *) final {} + void visit(luci::CircleTopKV2Out *) final {} + void visit(luci::CircleUniqueOut *) final {} + void visit(luci::CircleUnpackOut *) final {} + void visit(luci::CircleVariable *) final {} + void visit(luci::CircleWhileOut *) final {} + +protected: + ExportContext &_ctx; +}; + +} // namespace luci + +#endif // __CIRCLE_OPERATION_EXPORTER_RULE_H__ diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst new file mode 100644 index 000000000..1b6909303 --- /dev/null +++ b/compiler/luci/export/src/CircleOps.lst @@ -0,0 +1,154 @@ +#ifndef CIRCLE_NODE +#error "Define CIRCLE_NODE" +#endif // CIRCLE_NODE + +#ifndef CIRCLE_VNODE +#error "Define CIRCLE_VNODE" +#endif // CIRCLE_VNODE + +// +// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER +// +// NOTE : CIRCLE_VNODE does not have any additional parameters +// because they are not circle builtin operators +// Please add parameters when they are needed. +// +// CIRCLE_NODE(CircleNode, circle::BuiltinOperator, circle::BuiltinOptions) +// CIRCLE_VNODE(CircleNode) +// + +CIRCLE_NODE(CircleAbs, BuiltinOperator_ABS, BuiltinOptions_AbsOptions) +CIRCLE_NODE(CircleAdd, BuiltinOperator_ADD, BuiltinOptions_AddOptions) +CIRCLE_NODE(CircleAddN, BuiltinOperator_ADD_N, BuiltinOptions_AddNOptions) +CIRCLE_NODE(CircleArgMax, BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions) +CIRCLE_NODE(CircleArgMin, BuiltinOperator_ARG_MIN, BuiltinOptions_ArgMinOptions) +CIRCLE_NODE(CircleAveragePool2D, BuiltinOperator_AVERAGE_POOL_2D , BuiltinOptions_Pool2DOptions) +CIRCLE_NODE(CircleBatchToSpaceND, BuiltinOperator_BATCH_TO_SPACE_ND, BuiltinOptions_BatchToSpaceNDOptions) +CIRCLE_NODE(CircleBatchMatMul, BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions) +CIRCLE_NODE(CircleBidirectionalSequenceLSTM, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_BidirectionalSequenceLSTMOptions) +CIRCLE_NODE(CircleCast, BuiltinOperator_CAST, BuiltinOptions_CastOptions) +CIRCLE_NODE(CircleCeil, BuiltinOperator_CEIL, BuiltinOptions_NONE) +CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions) +CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions) +CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions) +CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE) +CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions) +CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions) +CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions) +CIRCLE_NODE(CircleDiv, BuiltinOperator_DIV, BuiltinOptions_DivOptions) +CIRCLE_NODE(CircleElu, BuiltinOperator_ELU, BuiltinOptions_NONE) +CIRCLE_NODE(CircleEqual, BuiltinOperator_EQUAL, BuiltinOptions_EqualOptions) +CIRCLE_NODE(CircleExp, BuiltinOperator_EXP, BuiltinOptions_ExpOptions) +CIRCLE_NODE(CircleExpandDims, BuiltinOperator_EXPAND_DIMS, BuiltinOptions_ExpandDimsOptions) +CIRCLE_NODE(CircleFakeQuant, BuiltinOperator_FAKE_QUANT, BuiltinOptions_FakeQuantOptions) +CIRCLE_NODE(CircleFill, BuiltinOperator_FILL, BuiltinOptions_FillOptions) +CIRCLE_NODE(CircleFloor, BuiltinOperator_FLOOR, BuiltinOptions_NONE) +CIRCLE_NODE(CircleFloorDiv, BuiltinOperator_FLOOR_DIV, BuiltinOptions_FloorDivOptions) +CIRCLE_NODE(CircleFloorMod, BuiltinOperator_FLOOR_MOD, BuiltinOptions_FloorModOptions) +CIRCLE_NODE(CircleFullyConnected, BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions) +CIRCLE_NODE(CircleGather, BuiltinOperator_GATHER, BuiltinOptions_GatherOptions) +CIRCLE_NODE(CircleGatherNd, BuiltinOperator_GATHER_ND, BuiltinOptions_GatherNdOptions) +CIRCLE_NODE(CircleGreater, BuiltinOperator_GREATER, BuiltinOptions_GreaterOptions) +CIRCLE_NODE(CircleGreaterEqual, BuiltinOperator_GREATER_EQUAL, BuiltinOptions_GreaterEqualOptions) +CIRCLE_NODE(CircleIf, BuiltinOperator_IF, BuiltinOptions_IfOptions) +CIRCLE_NODE(CircleL2Normalize, BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions) +CIRCLE_NODE(CircleL2Pool2D, BuiltinOperator_L2_POOL_2D, BuiltinOptions_Pool2DOptions) +CIRCLE_NODE(CircleLeakyRelu, BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions) +CIRCLE_NODE(CircleLess, BuiltinOperator_LESS, BuiltinOptions_LessOptions) +CIRCLE_NODE(CircleLessEqual, BuiltinOperator_LESS_EQUAL, BuiltinOptions_LessEqualOptions) +CIRCLE_NODE(CircleLocalResponseNormalization, BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, BuiltinOptions_LocalResponseNormalizationOptions) +CIRCLE_NODE(CircleLog, BuiltinOperator_LOG, BuiltinOptions_NONE) +CIRCLE_NODE(CircleLogicalAnd, BuiltinOperator_LOGICAL_AND, BuiltinOptions_LogicalAndOptions) +CIRCLE_NODE(CircleLogicalNot, BuiltinOperator_LOGICAL_NOT, BuiltinOptions_LogicalNotOptions) +CIRCLE_NODE(CircleLogicalOr, BuiltinOperator_LOGICAL_OR, BuiltinOptions_LogicalOrOptions) +CIRCLE_NODE(CircleLogistic, BuiltinOperator_LOGISTIC, BuiltinOptions_NONE) +CIRCLE_NODE(CircleLogSoftmax, BuiltinOperator_LOG_SOFTMAX, BuiltinOptions_LogSoftmaxOptions) +CIRCLE_NODE(CircleMatrixDiag, BuiltinOperator_MATRIX_DIAG, BuiltinOptions_MatrixDiagOptions) +CIRCLE_NODE(CircleMaxPool2D, BuiltinOperator_MAX_POOL_2D, BuiltinOptions_Pool2DOptions) +CIRCLE_NODE(CircleMatrixSetDiag, BuiltinOperator_MATRIX_SET_DIAG, BuiltinOptions_MatrixSetDiagOptions) +CIRCLE_NODE(CircleMaximum, BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumMinimumOptions) +CIRCLE_NODE(CircleMean, BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleMinimum, BuiltinOperator_MINIMUM, BuiltinOptions_MaximumMinimumOptions) +CIRCLE_NODE(CircleMirrorPad, BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions) +CIRCLE_NODE(CircleMul, BuiltinOperator_MUL, BuiltinOptions_MulOptions) +CIRCLE_NODE(CircleNeg, BuiltinOperator_NEG, BuiltinOptions_NegOptions) +CIRCLE_NODE(CircleNonMaxSuppressionV4, BuiltinOperator_NON_MAX_SUPPRESSION_V4, BuiltinOptions_NonMaxSuppressionV4Options) +CIRCLE_NODE(CircleNonMaxSuppressionV5, BuiltinOperator_NON_MAX_SUPPRESSION_V5, BuiltinOptions_NonMaxSuppressionV5Options) +CIRCLE_NODE(CircleNotEqual, BuiltinOperator_NOT_EQUAL, BuiltinOptions_NotEqualOptions) +CIRCLE_NODE(CircleOneHot, BuiltinOperator_ONE_HOT, BuiltinOptions_OneHotOptions) +CIRCLE_NODE(CirclePack, BuiltinOperator_PACK, BuiltinOptions_PackOptions) +CIRCLE_NODE(CirclePad, BuiltinOperator_PAD, BuiltinOptions_PadOptions) +CIRCLE_NODE(CirclePadV2, BuiltinOperator_PADV2, BuiltinOptions_PadV2Options) +CIRCLE_NODE(CirclePow, BuiltinOperator_POW, BuiltinOptions_PowOptions) +CIRCLE_NODE(CirclePRelu, BuiltinOperator_PRELU, BuiltinOptions_NONE) +CIRCLE_NODE(CircleQuantize, BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions) +CIRCLE_NODE(CircleRange, BuiltinOperator_RANGE, BuiltinOptions_RangeOptions) +CIRCLE_NODE(CircleRank, BuiltinOperator_RANK, BuiltinOptions_RankOptions) +CIRCLE_NODE(CircleReduceAny, BuiltinOperator_REDUCE_ANY, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleReduceMax, BuiltinOperator_REDUCE_MAX, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleReduceMin, BuiltinOperator_REDUCE_MIN, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleReduceProd, BuiltinOperator_REDUCE_PROD, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleRelu, BuiltinOperator_RELU, BuiltinOptions_NONE) +CIRCLE_NODE(CircleRelu6, BuiltinOperator_RELU6, BuiltinOptions_NONE) +CIRCLE_NODE(CircleReluN1To1, BuiltinOperator_RELU_N1_TO_1, BuiltinOptions_NONE) +CIRCLE_NODE(CircleReshape, BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions) +CIRCLE_NODE(CircleResizeBilinear, BuiltinOperator_RESIZE_BILINEAR, BuiltinOptions_ResizeBilinearOptions) +CIRCLE_NODE(CircleResizeNearestNeighbor, BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, BuiltinOptions_ResizeNearestNeighborOptions) +CIRCLE_NODE(CircleReverseSequence, BuiltinOperator_REVERSE_SEQUENCE, BuiltinOptions_ReverseSequenceOptions) +CIRCLE_NODE(CircleReverseV2, BuiltinOperator_REVERSE_V2, BuiltinOptions_ReverseV2Options) +CIRCLE_NODE(CircleRound, BuiltinOperator_ROUND, BuiltinOptions_NONE) +CIRCLE_NODE(CircleRsqrt, BuiltinOperator_RSQRT, BuiltinOptions_NONE) +CIRCLE_NODE(CircleScatterNd, BuiltinOperator_SCATTER_ND, BuiltinOptions_ScatterNdOptions) +CIRCLE_NODE(CircleSegmentSum, BuiltinOperator_SEGMENT_SUM, BuiltinOptions_SegmentSumOptions) +CIRCLE_NODE(CircleSelect, BuiltinOperator_SELECT, BuiltinOptions_SelectOptions) +CIRCLE_NODE(CircleSelectV2, BuiltinOperator_SELECT_V2, BuiltinOptions_SelectV2Options) +CIRCLE_NODE(CircleShape, BuiltinOperator_SHAPE, BuiltinOptions_ShapeOptions) +CIRCLE_NODE(CircleSin, BuiltinOperator_SIN, BuiltinOptions_NONE) +CIRCLE_NODE(CircleSlice, BuiltinOperator_SLICE, BuiltinOptions_SliceOptions) +CIRCLE_NODE(CircleSoftmax, BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions) +CIRCLE_NODE(CircleSpaceToBatchND, BuiltinOperator_SPACE_TO_BATCH_ND, BuiltinOptions_SpaceToBatchNDOptions) +CIRCLE_NODE(CircleSpaceToDepth, BuiltinOperator_SPACE_TO_DEPTH, BuiltinOptions_SpaceToDepthOptions) +CIRCLE_NODE(CircleSparseToDense, BuiltinOperator_SPARSE_TO_DENSE, BuiltinOptions_SparseToDenseOptions) +CIRCLE_NODE(CircleSplit, BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions) +CIRCLE_NODE(CircleSplitV, BuiltinOperator_SPLIT_V, BuiltinOptions_SplitVOptions) +CIRCLE_NODE(CircleSqrt, BuiltinOperator_SQRT, BuiltinOptions_NONE) +CIRCLE_NODE(CircleSquare, BuiltinOperator_SQUARE, BuiltinOptions_SquareOptions) +CIRCLE_NODE(CircleSquaredDifference, BuiltinOperator_SQUARED_DIFFERENCE, BuiltinOptions_SquaredDifferenceOptions) +CIRCLE_NODE(CircleSqueeze, BuiltinOperator_SQUEEZE, BuiltinOptions_SqueezeOptions) +CIRCLE_NODE(CircleStridedSlice, BuiltinOperator_STRIDED_SLICE, BuiltinOptions_StridedSliceOptions) +CIRCLE_NODE(CircleSub, BuiltinOperator_SUB, BuiltinOptions_SubOptions) +CIRCLE_NODE(CircleSum, BuiltinOperator_SUM, BuiltinOptions_ReducerOptions) +CIRCLE_NODE(CircleSVDF, BuiltinOperator_SVDF, BuiltinOptions_SVDFOptions) +CIRCLE_NODE(CircleTanh, BuiltinOperator_TANH, BuiltinOptions_NONE) +CIRCLE_NODE(CircleTile, BuiltinOperator_TILE, BuiltinOptions_TileOptions) +CIRCLE_NODE(CircleTopKV2, BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options) +CIRCLE_NODE(CircleTranspose, BuiltinOperator_TRANSPOSE, BuiltinOptions_TransposeOptions) +CIRCLE_NODE(CircleTransposeConv, BuiltinOperator_TRANSPOSE_CONV, BuiltinOptions_TransposeConvOptions) +CIRCLE_NODE(CircleUnidirectionalSequenceLSTM, BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_UnidirectionalSequenceLSTMOptions) +CIRCLE_NODE(CircleUnique, BuiltinOperator_UNIQUE, BuiltinOptions_UniqueOptions) +CIRCLE_NODE(CircleUnpack, BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions) +CIRCLE_NODE(CircleWhere, BuiltinOperator_WHERE, BuiltinOptions_WhereOptions) +CIRCLE_NODE(CircleWhile, BuiltinOperator_WHILE, BuiltinOptions_WhileOptions) +CIRCLE_NODE(CircleZerosLike, BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions) +// Circle Only +CIRCLE_NODE(CircleBCQFullyConnected, BuiltinOperator_BCQ_FULLY_CONNECTED, BuiltinOptions_BCQFullyConnectedOptions) +CIRCLE_NODE(CircleBCQGather, BuiltinOperator_BCQ_GATHER, BuiltinOptions_BCQGatherOptions) +CIRCLE_NODE(CircleInstanceNorm, BuiltinOperator_INSTANCE_NORM, BuiltinOptions_InstanceNormOptions) +// Virtual node(s) +CIRCLE_VNODE(CircleBidirectionalSequenceLSTMOut) +CIRCLE_VNODE(CircleConst) +CIRCLE_VNODE(CircleInput) +CIRCLE_VNODE(CircleOutput) +CIRCLE_VNODE(CircleOutputDummy) +CIRCLE_VNODE(CircleOutputExclude) +CIRCLE_VNODE(CircleCustomOut) +CIRCLE_VNODE(CircleIfOut) +CIRCLE_VNODE(CircleNonMaxSuppressionV4Out) +CIRCLE_VNODE(CircleNonMaxSuppressionV5Out) +CIRCLE_VNODE(CircleSplitOut) +CIRCLE_VNODE(CircleSplitVOut) +CIRCLE_VNODE(CircleTopKV2Out) +CIRCLE_VNODE(CircleUniqueOut) +CIRCLE_VNODE(CircleUnpackOut) +CIRCLE_VNODE(CircleVariable) +CIRCLE_VNODE(CircleWhileOut) diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index 615402aa8..b3bb850cc 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -67,6 +67,9 @@ public: luci::SparsityParam *sparsityparam(void) const { return _sparsityparam; } void sparsityparam(luci::SparsityParam *sp) { _sparsityparam = sp; } + bool is_variable(void) const { return _is_variable; } + void is_variable(bool v) { _is_variable = v; } + private: std::string _name; @@ -77,6 +80,8 @@ private: luci::CircleConst *_content = nullptr; luci::CircleQuantParam *_quantparam = nullptr; luci::SparsityParam *_sparsityparam = nullptr; + + bool _is_variable = false; }; class CircleTensorContext @@ -145,6 +150,8 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx) tensor_info.quantparam(node->quantparam()); tensor_info.sparsityparam(node->sparsityparam()); + tensor_info.is_variable(dynamic_cast<luci::CircleVariable *>(node) != nullptr); + set_tensor_index(node, tensor_index); ctx.emplace_back(tensor_info); @@ -592,9 +599,11 @@ void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &buil auto buffer_id = get_buffer_id(builder, md, info.content()); auto name_offset = builder.CreateString(info.name()); - auto tensor_offset = - CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam, - /*is_variable*/ false, sparsityparam, shape_signature_offset); + + auto is_variable = info.is_variable(); + + auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, + quantparam, is_variable, sparsityparam, shape_signature_offset); gd._tensors.push_back(tensor_offset); } diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h index a945eecf7..136a8ac49 100644 --- a/compiler/luci/export/src/SerializedData.h +++ b/compiler/luci/export/src/SerializedData.h @@ -23,7 +23,7 @@ #include <luci/IR/ExecutionPlanTable.h> #include <vector> - +#include <string> #include <unordered_map> #include <map> @@ -131,8 +131,8 @@ struct SerializedModelData final * @param builtin_code * @return idx of opcode in table of opcodes (see schema) */ - uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code, const int32_t op_version); - uint32_t registerCustomOpcode(const std::string &custom_op); + uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code, + const std::string &custom_code, const int32_t op_version); }; // Prerequisites for circle::Model object creation diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 6630cab9f..1b2db23ae 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -12,13 +12,14 @@ target_include_directories(luci_import PUBLIC include) target_link_libraries(luci_import PUBLIC luci_lang) target_link_libraries(luci_import PUBLIC luci_profile) target_link_libraries(luci_import PUBLIC luci_plan) -target_link_libraries(luci_import PUBLIC mio_circle) +target_link_libraries(luci_import PUBLIC mio_circle04) target_link_libraries(luci_import PRIVATE luci_env) target_link_libraries(luci_import PRIVATE luci_log) target_link_libraries(luci_import PRIVATE luci_logex) target_link_libraries(luci_import PRIVATE nncc_common) target_link_libraries(luci_import PRIVATE locop) target_link_libraries(luci_import PRIVATE oops) +target_link_libraries(luci_import PRIVATE mio_circle04_helper) install(TARGETS luci_import DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") @@ -32,7 +33,3 @@ nnas_find_package(GTest REQUIRED) GTest_AddTest(luci_import_test ${TESTS}) target_include_directories(luci_import_test PRIVATE src) target_link_libraries(luci_import_test luci_import) -target_link_libraries(luci_import_test oops) -target_link_libraries(luci_import_test luci_plan) -target_link_libraries(luci_import_test luci_lang) -target_link_libraries(luci_import_test mio_circle) diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h index fb38ba90b..a0519f661 100644 --- a/compiler/luci/import/include/luci/Import/CircleReader.h +++ b/compiler/luci/import/include/luci/Import/CircleReader.h @@ -35,19 +35,7 @@ namespace luci { -bool is_valid(const circle::OperatorCodeT &opcode); -bool is_valid(const circle::OperatorCode *opcode); - -bool is_custom(const circle::OperatorCodeT &opcode); -bool is_custom(const circle::OperatorCode *opcode); - -std::string opcode_name(const circle::OperatorCodeT &opcode); -std::string opcode_name(const circle::OperatorCode *opcode); - -const char *tensor_name(const circle::TensorT &tensor); const char *tensor_name(const circle::Tensor *tensor); - -const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor); const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor); loco::DataType luci_datatype(circle::TensorType type); @@ -57,14 +45,13 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode); luci::CircleFullyConnected::WeightsFormat luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format); std::unique_ptr<CircleQuantParam> -luci_quantparam(const circle::QuantizationParametersT *quantization); -std::unique_ptr<CircleQuantParam> luci_quantparam(const circle::QuantizationParameters *quantization); /// @brief Copy common tensor attributes such as name, type, etc. to node. -void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node); void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node); +std::string fb_string2std_string(const flatbuffers::String *fb_str); + /** * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity */ @@ -101,13 +88,6 @@ template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec) */ class CircleReader { -private: // unpack API - using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>; - using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>; - using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>; - using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>; - using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>; - private: // direct API using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>; using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>; @@ -115,40 +95,21 @@ private: // direct API using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>; using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>; - using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>; - using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>; - public: CircleReader() = default; -public: // unpack API - const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; } - const CircleBuffers_t &buffers() const { return _model->buffers; } - const CircleTensors_t &tensors() const { return _current_subgraph->tensors; } - const CircleOperators_t &operators() const { return _current_subgraph->operators; } - const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; } - const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; } - const std::string &name() const { return _current_subgraph->name; } - const circle::DataFormat &data_format() const { return _current_subgraph->data_format; } - const CircleMetadata_t &metadata() const { return _model->metadata; } - - const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; } - - uint32_t num_subgraph() const { return _model->subgraphs.size(); } - - circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const; - std::string opcode_name(const circle::OperatorT &op) const; - public: // direct API - CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); } - CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); } - CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); } - CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); } - VectorWrapper<int32_t> native_inputs() const { return wrap(_native_subgraph->inputs()); } - VectorWrapper<int32_t> native_outputs() const { return wrap(_native_subgraph->outputs()); } - std::string native_name() const { return _native_subgraph->name()->str(); } - circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); } - CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); } + CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); } + CircleBuffers buffers() const { return wrap(_model->buffers()); } + CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); } + CircleOperators operators() const { return wrap(_current_subgraph->operators()); } + VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); } + VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); } + std::string name() const { return fb_string2std_string(_current_subgraph->name()); } + circle::DataFormat data_format() const { return _current_subgraph->data_format(); } + CircleMetadataSet metadata() const { return wrap(_model->metadata()); } + + uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); } circle::BuiltinOperator builtin_code(const circle::Operator *op) const; std::string opcode_name(const circle::Operator *op) const; @@ -158,12 +119,8 @@ public: bool select_subgraph(uint32_t subgraph); private: - std::unique_ptr<const circle::ModelT> _model; - const circle::SubGraphT *_current_subgraph{nullptr}; - - const circle::Model *_native_model{nullptr}; - const CircleTensorsPtr_t *_tensors_ptr{nullptr}; - const circle::SubGraph *_native_subgraph{nullptr}; + const circle::Model *_model{nullptr}; + const circle::SubGraph *_current_subgraph{nullptr}; }; } // namespace luci diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h index b8dc22fdd..93e34a56b 100644 --- a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h +++ b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h @@ -18,6 +18,7 @@ #define __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__ #include "GraphBuilderBase.h" +#include "NodeBuilder.h" #include <map> @@ -32,6 +33,11 @@ struct GraphBuilderSource * @brief Returns registered GraphBuilder pointer for operator (nullptr if not present) */ virtual const GraphBuilderBase *lookup(const circle::BuiltinOperator &op) const = 0; + + /** + * @brief Returns registered NodeBuilderBase pointer for type (nullptr if not present) + */ + virtual const NodeBuilderBase *lookup(const NodeBuilderType type) const = 0; }; /** @@ -61,6 +67,17 @@ public: return _builder_map.at(op).get(); } + /** + * @brief Returns registered NodeBuilderBase pointer for type or nullptr if not registered + */ + const NodeBuilderBase *lookup(const NodeBuilderType type) const final + { + if (_node_builders.find(type) == _node_builders.end()) + return (_parent == nullptr) ? nullptr : _parent->lookup(type); + + return _node_builders.at(type).get(); + } + static GraphBuilderRegistry &get() { static GraphBuilderRegistry me; @@ -73,11 +90,17 @@ public: _builder_map[op] = std::move(builder); } + void add(std::unique_ptr<NodeBuilderBase> &&builder) + { + _node_builders[builder->builder_type()] = std::move(builder); + } + private: const GraphBuilderSource *_parent = nullptr; private: std::map<const circle::BuiltinOperator, std::unique_ptr<GraphBuilderBase>> _builder_map; + std::map<const NodeBuilderType, std::unique_ptr<NodeBuilderBase>> _node_builders; }; } // namespace luci diff --git a/compiler/luci/import/include/luci/Import/NodeBuilder.h b/compiler/luci/import/include/luci/Import/NodeBuilder.h new file mode 100644 index 000000000..440b491b0 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/NodeBuilder.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_NODE_BUILDER_H__ +#define __LUCI_IMPORT_NODE_BUILDER_H__ + +#include "GraphBuilderContext.h" +#include "GraphBuilderBase.h" + +#include <mio/circle/schema_generated.h> + +namespace luci +{ + +/** + * @brief Tensor types which requires separated node + */ +enum class NodeBuilderType +{ + BUFFER, + // TODO Extend this struct here if needed to add new type of NodeBuilderBase +}; + +/** + * @brief Creates nodes from given Tensor and context + */ +class NodeBuilderBase +{ +public: + virtual CircleNode *build(TensorIndex tensor_idx, GraphBuilderContext *context) const = 0; + virtual NodeBuilderType builder_type() const = 0; +}; + +/** + * @brief Placeholder for builders of tensors with different types + */ +template <NodeBuilderType Type> class TypedNodeBuilder : public NodeBuilderBase +{ +public: + NodeBuilderType builder_type() const final { return Type; } +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_NODE_BUILDER_H__ diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h index f7d22e7aa..7a5045ede 100644 --- a/compiler/luci/import/include/luci/Import/Nodes.h +++ b/compiler/luci/import/include/luci/Import/Nodes.h @@ -122,6 +122,7 @@ #include "Nodes/CircleStridedSlice.h" #include "Nodes/CircleSub.h" #include "Nodes/CircleSum.h" +#include "Nodes/CircleSVDF.h" #include "Nodes/CircleTanh.h" #include "Nodes/CircleTile.h" #include "Nodes/CircleTopKV2.h" @@ -130,6 +131,7 @@ #include "Nodes/CircleUnidirectionalSequenceLSTM.h" #include "Nodes/CircleUnique.h" #include "Nodes/CircleUnpack.h" +#include "Nodes/CircleVariable.h" #include "Nodes/CircleWhere.h" #include "Nodes/CircleWhile.h" #include "Nodes/CircleZerosLike.h" diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h index 7d4f10a59..9e50ddbde 100644 --- a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h @@ -17,20 +17,21 @@ #ifndef __LUCI_IMPORT_OP_CIRCLE_CONST_H__ #define __LUCI_IMPORT_OP_CIRCLE_CONST_H__ -#include "luci/Import/GraphBuilderContext.h" +#include "luci/Import/NodeBuilder.h" #include <luci/IR/Nodes/CircleConst.h> -/* - * @note Circle does not have Const operator. - * Methods here provide helper that creates CircleConst from - * Tensor and Buffer in circle flatbuffer file. - */ - namespace luci { -CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index); +/** + * @brief Builder creates CircleConst node from Tensor with buffer. + */ +class CircleConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER> +{ +public: + CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final; +}; } // namespace luci diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h new file mode 100644 index 000000000..a91f66019 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_SVDF_H__ +#define __LUCI_IMPORT_OP_CIRCLE_SVDF_H__ + +#include "luci/Import/GraphBuilder.h" + +namespace luci +{ + +class CircleSVDFBuilder : public GraphBuilder +{ +public: + bool validate(const ValidateArgs &args) const final; + +private: + CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const final; +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_SVDF_H__ diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h new file mode 100644 index 000000000..4d8961fa5 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__ +#define __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__ + +#include "luci/Import/GraphBuilderContext.h" + +#include <luci/IR/Nodes/CircleVariable.h> + +/* + * @note Circle does not have node for variable tensor + * Methods here provide helper that creates CircleVariable from + * Tensor having is_variable true value. + */ + +namespace luci +{ + +CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index); + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__ diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp index 42dcebdaa..9c1fe7356 100644 --- a/compiler/luci/import/src/CircleImportMetadata.cpp +++ b/compiler/luci/import/src/CircleImportMetadata.cpp @@ -21,8 +21,10 @@ namespace { -uint32_t read_u32(const std::vector<uint8_t> &buffer, uint32_t idx) +template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx) { + static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!"); + uint32_t val = 0; val += (buffer.at(idx + 0) << 0 * 8); val += (buffer.at(idx + 1) << 1 * 8); @@ -37,9 +39,11 @@ namespace { // 'source_table' is decoded to std::map<uint32_t, std::string> format. -const std::map<uint32_t, std::string> -decoded_source_table(const std::vector<uint8_t> &source_table_data) +template <typename VECTORTYPE> +const std::map<uint32_t, std::string> decoded_source_table(const VECTORTYPE &source_table_data) { + static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!"); + std::map<uint32_t, std::string> source_id_name_map; uint32_t idx = 0; @@ -86,9 +90,11 @@ decoded_source_table(const std::vector<uint8_t> &source_table_data) } // 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format. -const std::map<uint32_t, std::set<uint32_t>> -decoded_op_table(const std::vector<uint8_t> &op_table_data) +template <typename VECTORTYPE> +const std::map<uint32_t, std::set<uint32_t>> decoded_op_table(const VECTORTYPE &op_table_data) { + static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!"); + std::map<uint32_t, std::set<uint32_t>> node_source_ids_map; uint32_t idx = 0; @@ -135,9 +141,11 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data) } // 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format. -const luci::ExecutionPlanTable -decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data) +template <typename VECTORTYPE> +const luci::ExecutionPlanTable decoded_execution_plan(const VECTORTYPE &execution_plan_data) { + static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!"); + luci::ExecutionPlanTable execution_plan_table; uint32_t idx = 0; @@ -156,6 +164,10 @@ decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data) idx += sizeof(uint32_t); uint32_t size = read_u32(execution_plan_data, idx); + + if (size == 0) + throw std::runtime_error("Op table decode error : empty execution plan entry"); + idx += sizeof(uint32_t); if (idx + sizeof(uint32_t) * size > execution_plan_data.size()) @@ -190,19 +202,22 @@ namespace luci CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader) { - const auto &metadata = reader.metadata(); + const auto metadata = reader.metadata(); for (uint32_t i = 0; i < metadata.size(); ++i) { - const circle::MetadataT &meta = *metadata[i]; + const auto *meta = metadata[i]; + assert(meta != nullptr); - assert(meta.buffer < reader.buffers().size()); - const std::vector<uint8_t> &buffer = reader.buffers()[meta.buffer]->data; + assert(meta->buffer() < reader.buffers().size()); + assert(reader.buffers()[meta->buffer()] != nullptr); + const auto buffer = luci::wrap(reader.buffers()[meta->buffer()]->data()); - if (meta.name.compare("ONE_op_table") == 0) + assert(meta->name() != nullptr); + if (meta->name()->str().compare("ONE_op_table") == 0) _op_table = decoded_op_table(buffer); - else if (meta.name.compare("ONE_source_table") == 0) + else if (meta->name()->str().compare("ONE_source_table") == 0) _source_table = decoded_source_table(buffer); - else if (meta.name.compare("ONE_execution_plan_table") == 0) + else if (meta->name()->str().compare("ONE_execution_plan_table") == 0) _execution_plan_table = decoded_execution_plan(buffer); } } diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp index 14917ba06..a42c3f913 100644 --- a/compiler/luci/import/src/CircleReader.cpp +++ b/compiler/luci/import/src/CircleReader.cpp @@ -16,6 +16,9 @@ #include "luci/Import/CircleReader.h" +#include <mio_circle/Helper.h> + +#include <algorithm> #include <memory> #include <sstream> #include <string> @@ -23,103 +26,14 @@ namespace luci { -bool is_valid(const circle::OperatorCodeT &opcode) -{ - circle::BuiltinOperator code = opcode.builtin_code; - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_valid(const circle::OperatorCode *opcode) -{ - assert(opcode != nullptr); - circle::BuiltinOperator code = opcode->builtin_code(); - return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); -} - -bool is_custom(const circle::OperatorCodeT &opcode) -{ - circle::BuiltinOperator code = opcode.builtin_code; - return (code == circle::BuiltinOperator_CUSTOM); -} - -bool is_custom(const circle::OperatorCode *opcode) -{ - assert(opcode != nullptr); - circle::BuiltinOperator code = opcode->builtin_code(); - return (code == circle::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const circle::OperatorCodeT &opcode) -{ - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (opcode.custom_code.empty()) - return "(invalid custom)"; - - return opcode.custom_code; - } - - circle::BuiltinOperator code = opcode.builtin_code; - return circle::EnumNameBuiltinOperator(code); -} - -std::string opcode_name(const circle::OperatorCode *opcode) -{ - assert(opcode != nullptr); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - auto custom_code = opcode->custom_code()->str(); - if (custom_code.empty()) - return "(invalid custom)"; - - return custom_code; - } - - circle::BuiltinOperator code = opcode->builtin_code(); - return circle::EnumNameBuiltinOperator(code); -} - -const char *tensor_name(const circle::TensorT &tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - if (!tensor.name.empty()) - return tensor.name.c_str(); - - return kEmptyTensorName; -} - const char *tensor_name(const circle::Tensor *tensor) { assert(tensor != nullptr); - static const char *kEmptyTensorName = "(noname)"; - const auto tensor_name = tensor->name()->c_str(); - - if (!std::string(tensor_name).empty()) - return tensor_name; + if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty()) + return "(noname)"; - return kEmptyTensorName; -} - -const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor) -{ - return tensor.quantization.get(); + return tensor->name()->c_str(); } const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor) @@ -334,41 +248,6 @@ std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParamete return luci_sparsityparam(&sparsity); } -void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node) -{ - node->name(tensor_name(tensor)); - node->dtype(luci_datatype(tensor.type)); - - assert(tensor.shape_signature.size() == 0 || - tensor.shape_signature.size() == tensor.shape.size()); - - std::vector<int32_t> dims = tensor.shape; // in NHWC - node->rank(dims.size()); - for (uint32_t r = 0; r < dims.size(); ++r) - { - if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1) - node->dim(r).unset(); - else - node->dim(r).set(dims[r]); - } - - const auto *quantization = tensor.quantization.get(); - if (quantization != nullptr) - { - auto quantparam = luci_quantparam(quantization); - if (quantparam) - node->quantparam(std::move(quantparam)); - } - - const auto *sparsity = tensor.sparsity.get(); - if (sparsity != nullptr) - { - auto sparsityparam = luci_sparsityparam(sparsity); - if (sparsityparam) - node->sparsityparam(std::move(sparsityparam)); - } -} - void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node) { assert(tensor != nullptr); @@ -408,63 +287,60 @@ void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node) } } -circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const +std::string fb_string2std_string(const flatbuffers::String *fb_str) { - const auto &op_codes = opcodes(); - uint32_t index = op.opcode_index; + return fb_str == nullptr ? "" : fb_str->str(); +} + +circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const +{ + assert(op != nullptr); + + const auto op_codes = opcodes(); + uint32_t index = op->opcode_index(); assert(index < op_codes.size()); - const circle::OperatorCodeT &opcode = *op_codes[index]; + const auto opcode = op_codes[index]; + assert(opcode != nullptr); - return opcode.builtin_code; + return mio::circle::builtin_code_neutral(opcode); } -std::string CircleReader::opcode_name(const circle::OperatorT &op) const +std::string CircleReader::opcode_name(const circle::Operator *op) const { - const auto &op_codes = opcodes(); - uint32_t index = op.opcode_index; - assert(index < op_codes.size()); - const circle::OperatorCodeT &opcode = *op_codes[index]; + assert(op != nullptr); - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } + const auto op_codes = opcodes(); + uint32_t index = op->opcode_index(); + assert(index < op_codes.size()); + const auto opcode = op_codes[index]; - return ::luci::opcode_name(opcode); + return mio::circle::opcode_name(opcode); } bool CircleReader::parse(const circle::Model *model) { assert(model != nullptr); - _model.reset(model->UnPack()); - // for direct pointer access - _native_model = model; + _model = model; return true; } bool CircleReader::select_subgraph(uint32_t sgindex) { - if (_model->subgraphs.size() <= sgindex) + if (num_subgraph() <= sgindex) { assert(false); return false; } - _current_subgraph = _model->subgraphs[sgindex].get(); - // for direct pointer access - auto subgraphs = _native_model->subgraphs(); + auto subgraphs = _model->subgraphs(); assert(subgraphs != nullptr); - _native_subgraph = subgraphs->Get(sgindex); - assert(_native_subgraph != nullptr); - - _tensors_ptr = _native_subgraph->tensors(); + _current_subgraph = subgraphs->Get(sgindex); + assert(_current_subgraph != nullptr); return true; } diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp index 356501c2f..59a08b546 100644 --- a/compiler/luci/import/src/GraphBuilder.cpp +++ b/compiler/luci/import/src/GraphBuilder.cpp @@ -29,10 +29,9 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext const std::vector<int32_t> &inputs = op.inputs; const std::vector<int32_t> &outputs = op.outputs; - const auto &tensors = context->reader()->tensors(); - const auto &opcodes = context->reader()->opcodes(); - auto tensors_ptr = context->reader()->tensors_ptr(); - assert(tensors_ptr != nullptr); + const auto tensors = context->reader()->tensors(); + const auto opcodes = context->reader()->opcodes(); + assert(!tensors.null()); std::vector<CircleNode *> input_nodes; for (const int32_t input_tensor_index : inputs) @@ -60,16 +59,18 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext // Set up node parameters. assert(outputs.size() == 1); { - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto output_tensor = tensors[outputs[0]]; + assert(output_tensor != nullptr); copy_tensor_attributes(output_tensor, node); // mark shape_status - if (tensors_ptr->Get(outputs[0])->shape() == nullptr) + if (output_tensor->shape() == nullptr) node->shape_status(ShapeStatus::NOSHAPE); else node->shape_status(ShapeStatus::VALID); // mark operator version - node->op_version(opcodes[op.opcode_index].get()->version); + assert(opcodes[op.opcode_index] != nullptr); + node->op_version(opcodes[op.opcode_index]->version()); } // Register node's only output. diff --git a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp index be553f4c0..4df8d1e5a 100644 --- a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp +++ b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp @@ -30,10 +30,9 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op, const std::vector<int32_t> &inputs = op.inputs; const std::vector<int32_t> &outputs = op.outputs; - const auto &tensors = context->reader()->tensors(); - const auto &opcodes = context->reader()->opcodes(); - auto tensors_ptr = context->reader()->tensors_ptr(); - assert(tensors_ptr != nullptr); + const auto tensors = context->reader()->tensors(); + const auto opcodes = context->reader()->opcodes(); + assert(!tensors.null()); std::vector<CircleNode *> input_nodes; for (const int32_t input_tensor_index : inputs) @@ -64,12 +63,14 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op, if (output_count > 0) { // Let's use attributes from output 0 for this node - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto output_tensor = tensors[outputs[0]]; + assert(output_tensor != nullptr); node->name(tensor_name(output_tensor)); - node->dtype(luci_datatype(output_tensor.type)); + node->dtype(luci_datatype(output_tensor->type())); // mark operator version - node->op_version(opcodes[op.opcode_index].get()->version); + assert(opcodes[op.opcode_index] != nullptr); + node->op_version(opcodes[op.opcode_index]->version()); // NOTE We don't set quantization for multiple output nodes but to virtual outputs } @@ -77,7 +78,8 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op, // Create virtual outputs of Virtual Output node(s) for (uint32_t n = 0; n < output_count; ++n) { - const circle::TensorT &output_tensor = *tensors[outputs[n]]; + const auto output_tensor = tensors[outputs[n]]; + assert(output_tensor != nullptr); BuildOutArgs boa(node, n); auto *nodeout = build_out(boa); @@ -85,7 +87,7 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op, copy_tensor_attributes(output_tensor, nodeout); // NOTE name of CxxxOut nodes may have same name // mark shape_status - if (tensors_ptr->Get(outputs[n])->shape() == nullptr) + if (output_tensor->shape() == nullptr) nodeout->shape_status(ShapeStatus::NOSHAPE); else nodeout->shape_status(ShapeStatus::VALID); diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp index df07d9e48..fe2d830e9 100644 --- a/compiler/luci/import/src/GraphBuilderRegistry.cpp +++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp @@ -131,6 +131,7 @@ GraphBuilderRegistry::GraphBuilderRegistry() CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceGraphBuilder); // 45 CIRCLE_NODE(SUB, CircleSubGraphBuilder); // 41 CIRCLE_NODE(SUM, CircleSumGraphBuilder); // 74 + CIRCLE_NODE(SVDF, CircleSVDFBuilder); // 27 CIRCLE_NODE(TANH, CircleTanhGraphBuilder); // 28 CIRCLE_NODE(TILE, CircleTileGraphBuilder); // 69 CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder); // 48 @@ -150,7 +151,6 @@ GraphBuilderRegistry::GraphBuilderRegistry() // BuiltinOperator_LSH_PROJECTION = 15, // BuiltinOperator_LSTM = 16, // BuiltinOperator_RNN = 24, - // BuiltinOperator_SVDF = 27, // BuiltinOperator_CONCAT_EMBEDDINGS = 29, // BuiltinOperator_SKIP_GRAM = 30, // BuiltinOperator_CALL = 31, @@ -161,6 +161,13 @@ GraphBuilderRegistry::GraphBuilderRegistry() // BuiltinOperator_ARG_MAX = 56, // BuiltinOperator_HARD_SWISH = 117, // BuiltinOperator_DENSIFY = 124, + + // Register builders for nodes which not handles in builders registered above. +#define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>()) + + CIRCLE_NODE(CircleConstNodeBuilder); + +#undef CIRCLE_NODE } } // namespace luci diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp index 3f7f78591..15de03df2 100644 --- a/compiler/luci/import/src/Importer.cpp +++ b/compiler/luci/import/src/Importer.cpp @@ -23,6 +23,7 @@ #include "luci/Import/GraphBuilderRegistry.h" #include "luci/Import/CircleReader.h" #include "luci/Import/Nodes/CircleConst.h" +#include "luci/Import/Nodes/CircleVariable.h" #include <luci/IR/Module.h> #include <luci/IR/CircleNodes.h> @@ -50,18 +51,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r luci::GraphBuilderContext gb_context(graph, &reader, nodefinder.get(), tensoroutputs.get()); - const auto &operators = reader.operators(); - const auto &tensors = reader.tensors(); - auto tensors_ptr = reader.tensors_ptr(); - assert(tensors_ptr != nullptr); + const auto operators = reader.operators(); + const auto tensors = reader.tensors(); + assert(!tensors.null()); auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader); // build a cache to identify if a tensor is output of an operator // if this is set, we should not create a CircleConst for this tensor for (uint32_t i = 0; i < operators.size(); ++i) { - const circle::OperatorT &op = *operators[i]; - const auto &outputs = op.outputs; + const auto op = operators[i]; + assert(op != nullptr); + const auto outputs = luci::wrap(op->outputs()); for (uint32_t j = 0; j < outputs.size(); ++j) { @@ -77,10 +78,11 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r { auto input_node = graph->nodes()->create<luci::CircleInput>(); assert(input_node != nullptr); - const circle::TensorT &tensor = *tensors[input]; + const auto tensor = tensors[input]; + assert(tensor != nullptr); luci::copy_tensor_attributes(tensor, input_node); - if (tensors_ptr->Get(input)->shape() == nullptr) + if (tensor->shape() == nullptr) input_node->shape_status(luci::ShapeStatus::NOSHAPE); else input_node->shape_status(luci::ShapeStatus::VALID); @@ -101,16 +103,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r // Data type graph_input->dtype(input_node->dtype()); - assert(tensor.shape_signature.size() == 0 || - tensor.shape_signature.size() == tensor.shape.size()); + const auto tensor_shape_signature = luci::wrap(tensor->shape_signature()); + const auto tensor_shape = luci::wrap(tensor->shape()); + assert(tensor_shape_signature.size() == 0 || + tensor_shape_signature.size() == tensor_shape.size()); // Shape of GraphInput auto input_shape = std::make_unique<loco::TensorShape>(); - const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC + const auto &input_dims = tensor_shape; // in NHWC input_shape->rank(input_dims.size()); for (uint32_t r = 0; r < input_dims.size(); ++r) { - if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1) + if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1) input_shape->dim(r).unset(); else input_shape->dim(r).set(input_dims[r]); @@ -118,15 +122,28 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r graph_input->shape(std::move(input_shape)); } - // Create CircleConst nodes for constant tensors. + // Create CircleNodes for constant tensors. // NOTE Origin is intentionally not provided for constants. + auto const_builder = source.lookup(luci::NodeBuilderType::BUFFER); + if (not const_builder) + throw oops::UserExn("Not supported", "tensor with buffer builder"); + for (uint32_t i = 0; i < tensors.size(); ++i) { - luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i); + auto *const_node = const_builder->build(i, &gb_context); if (const_node != nullptr) nodefinder->enroll(i, const_node); } + // Create CircleVariable nodes for variable tensors + // TODO Add Origin if needed, skip for now + for (uint32_t i = 0; i < tensors.size(); ++i) + { + luci::CircleVariable *variable_node = luci::create_circlevariable(&gb_context, i); + if (variable_node != nullptr) + nodefinder->enroll(i, variable_node); + } + // Import the operators. // Note that operators in model are stored in execution order. This means that when importing // an operator, its input operators have already been imported. We exploit this fact to set up @@ -134,18 +151,23 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r auto origin_table = circle_metadata->origin_table(); for (uint32_t i = 0; i < operators.size(); ++i) { - const circle::OperatorT &op = *operators[i]; + const auto op = operators[i]; + assert(op != nullptr); circle::BuiltinOperator builtincode = reader.builtin_code(op); if (const auto *builder = source.lookup(builtincode)) { - luci::GraphBuilder::ValidateArgs args(op, reader); + // create temporary unpack API obj + circle::OperatorT oper_t; + op->UnPackTo(&oper_t); + + luci::GraphBuilder::ValidateArgs args(oper_t, reader); if (!builder->validate(args)) { throw oops::UserExn("Invalid operator", reader.opcode_name(op)); } - auto built_op = builder->build(op, &gb_context); + auto built_op = builder->build(oper_t, &gb_context); set_node_id(built_op, i); if (origin_table.find(i) != origin_table.end()) add_origin(built_op, origin_table.at(i)); @@ -161,7 +183,8 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r // graph outputs for (auto output : reader.outputs()) { - const circle::TensorT &tensor = *tensors[output]; + const auto tensor = tensors[output]; + assert(tensor != nullptr); auto output_node = graph->nodes()->create<luci::CircleOutput>(); assert(output_node != nullptr); @@ -178,7 +201,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r output_node->from(output_dummy); luci::copy_tensor_attributes(tensor, output_dummy); - if (tensors_ptr->Get(output)->shape() == nullptr) + if (tensor->shape() == nullptr) output_dummy->shape_status(luci::ShapeStatus::NOSHAPE); else output_dummy->shape_status(luci::ShapeStatus::VALID); @@ -197,16 +220,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r // Set GraphInputOutputIndex for graph output_node->index(graph_output->index()); - assert(tensor.shape_signature.size() == 0 || - tensor.shape_signature.size() == tensor.shape.size()); + const auto tensor_shape_signature = luci::wrap(tensor->shape_signature()); + const auto tensor_shape = luci::wrap(tensor->shape()); + assert(tensor_shape_signature.size() == 0 || + tensor_shape_signature.size() == tensor_shape.size()); // Shape of Output auto output_shape = std::make_unique<loco::TensorShape>(); - const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC + const auto &output_dims = tensor_shape; // in NHWC output_shape->rank(output_dims.size()); for (uint32_t r = 0; r < output_dims.size(); ++r) { - if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1) + if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1) output_shape->dim(r).unset(); else output_shape->dim(r).set(output_dims[r]); @@ -214,7 +239,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r graph_output->shape(std::move(output_shape)); // Data type - auto dtype = luci::luci_datatype(tensor.type); + auto dtype = luci::luci_datatype(tensor->type()); graph_output->dtype(dtype); } } @@ -355,7 +380,12 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const { if (auto circle_node = dynamic_cast<luci::CircleNode *>(node)) { + if (execution_plan_table.count(node_position) == 0) + continue; + auto node_plan = execution_plan_table[node_position]; + assert(node_plan.size() > 0); + luci::add_execution_plan( circle_node, luci::CircleNodeExecutionPlan( diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp index d963b4d49..91e4860ea 100644 --- a/compiler/luci/import/src/Importer.test.cpp +++ b/compiler/luci/import/src/Importer.test.cpp @@ -23,7 +23,7 @@ #include <mio/circle/schema_generated.h> #include <flatbuffers/flatbuffers.h> -TEST(TensorFlowLiteImport, Dummy) +TEST(CircleImport, Dummy) { luci::Importer import; @@ -68,6 +68,7 @@ struct BasicCircleModel { uint32_t id = model->operator_codes.size(); model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>()); + model->operator_codes[id]->deprecated_builtin_code = opcode; model->operator_codes[id]->builtin_code = opcode; model->operator_codes[id]->version = 1; return id; @@ -179,7 +180,7 @@ struct SimpleRELUModel : public BasicCircleModel /** * This test checks that one op RELU model with execution plan is successfully imported */ -TEST(TensorFlowLiteImport, simple_plan) +TEST(CircleImport, simple_plan) { SimpleRELUModel model; auto metadata_buffer_id = model.add_buffer(); @@ -240,7 +241,7 @@ TEST(TensorFlowLiteImport, simple_plan) /** * This test checks that model with incomplete execution plan is successfully imported */ -TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG) +TEST(CircleImport, incomplete_plan_NEG) { SimpleRELUModel model; auto metadata_buffer_id = model.add_buffer(); @@ -287,7 +288,7 @@ TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG) /** * This test checks that corrupted execution plan induce exception */ -TEST(TensorFlowLiteImport, corrupted_plan_NEG) +TEST(CircleImport, corrupted_plan_NEG) { SimpleRELUModel model; auto metadata_buffer_id = model.add_buffer(); @@ -309,3 +310,44 @@ TEST(TensorFlowLiteImport, corrupted_plan_NEG) ASSERT_ANY_THROW(import.importModule(model_ptr)); } + +/** + * This test checks that empty execution plan entry induce exception + */ +TEST(CircleImport, corrupted_plan_entry_NEG) +{ + SimpleRELUModel model; + auto metadata_buffer_id = model.add_buffer(); + model.add_plan_metadata(metadata_buffer_id); + + model.add_plan_entry(metadata_buffer_id, 1, {100}); + + // add corrupted entry with 0 size + { + auto &buffer = model.model->buffers[metadata_buffer_id]->data; + auto old_size = buffer.size(); + + // Allocate space for new entry: + // 4 bytes for entry id + // 4 bytes for entry size + buffer.resize(old_size + 8); + uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data()); + *number_of_entries_ptr += 1; + + uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size); + + entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id + entry_data_ptr[1] = 0; // entry size + } + + model.add_plan_entry(metadata_buffer_id, 3, {200}); + + flatbuffers::FlatBufferBuilder fbb; + auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr); + circle::FinishModelBuffer(fbb, model_offset); + + auto model_ptr = circle::GetModel(fbb.GetBufferPointer()); + luci::Importer import; + + ASSERT_ANY_THROW(import.importModule(model_ptr)); +} diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp index 3e8c08bfa..acde823b1 100644 --- a/compiler/luci/import/src/Nodes/CircleCast.cpp +++ b/compiler/luci/import/src/Nodes/CircleCast.cpp @@ -42,12 +42,14 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const const auto *options = args.op.builtin_options.AsCastOptions(); if (options != nullptr) { - const auto &tensors = args.reader.tensors(); - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto tensors = args.reader.tensors(); + const auto output_tensor = tensors[outputs[0]]; + assert(output_tensor != nullptr); auto name = tensor_name(output_tensor); - const auto &tensor_in = tensors.at(inputs.at(0)); - if (tensor_in->type != options->in_data_type) + const auto tensor_in = tensors.at(inputs.at(0)); + assert(tensor_in != nullptr); + if (tensor_in->type() != options->in_data_type) { if (settings->get(luci::UserSettings::Key::DisableValidation)) { @@ -57,7 +59,7 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const return false; } const auto &tensor_out = tensors.at(outputs[0]); - if (tensor_out->type != options->out_data_type) + if (tensor_out->type() != options->out_data_type) { if (settings->get(luci::UserSettings::Key::DisableValidation)) { diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index 11fbb4e54..a4f190dd9 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -30,10 +30,10 @@ namespace { -std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect) +std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect) { uint32_t seq = 0; - for (auto &v : vect) + for (const auto &v : vect) { if (seq) os << ", "; @@ -46,7 +46,8 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect) using namespace luci; template <loco::DataType DT> -void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, CircleConst *const_node) +void copy_data(const VectorWrapper<uint8_t> &raw_data, uint32_t num_elements, + CircleConst *const_node) { using T = typename loco::DataTypeImpl<DT>::Type; @@ -67,8 +68,8 @@ void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, Circ } template <> -void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uint32_t num_elements, - CircleConst *const_node) +void copy_data<loco::DataType::STRING>(const VectorWrapper<uint8_t> &raw_data, + uint32_t num_elements, CircleConst *const_node) { assert(const_node->sparsityparam() == nullptr); @@ -106,17 +107,26 @@ void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uin namespace luci { -CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index) +CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index, + GraphBuilderContext *context) const { + assert(tensor_index >= 0); LOGGER(l); auto graph = context->graph(); auto reader = context->reader(); - const auto &tensors = reader->tensors(); - const circle::TensorT &const_tensor = *tensors[tensor_index]; + const auto tensors = reader->tensors(); + const auto const_tensor = tensors[tensor_index]; + assert(const_tensor != nullptr); + if (const_tensor->is_variable()) + { + // Create CircleVariable for variable + return nullptr; + } - const std::vector<uint8_t> &buffer = reader->buffers()[const_tensor.buffer]->data; - std::vector<int32_t> const_dims = const_tensor.shape; // in NHWC + assert(reader->buffers()[const_tensor->buffer()] != nullptr); + const auto buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + const auto const_dims = wrap(const_tensor->shape()); // in NHWC if (const_dims.size() == 0 && buffer.empty()) { // unknown shape tensor and scalar tensor @@ -150,7 +160,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind << const_dims << std::endl; if (num_elements > 0) { - switch (luci_datatype(const_tensor.type)) + switch (luci_datatype(const_tensor->type())) { case loco::DataType::FLOAT32: copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node); @@ -186,7 +196,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind default: throw oops::UserExn("Unsupported tensor type", - circle::EnumNameTensorType(const_tensor.type)); + circle::EnumNameTensorType(const_tensor->type())); } } diff --git a/compiler/luci/import/src/Nodes/CircleCustom.cpp b/compiler/luci/import/src/Nodes/CircleCustom.cpp index 01ac3e2a0..4e78d5fb7 100644 --- a/compiler/luci/import/src/Nodes/CircleCustom.cpp +++ b/compiler/luci/import/src/Nodes/CircleCustom.cpp @@ -39,13 +39,15 @@ CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const node->inputs(idx, bna.input_nodes[idx]); } - const auto &opcodes = bna.context->reader()->opcodes(); + const auto opcodes = bna.context->reader()->opcodes(); const uint32_t opcode_index = bna.op.opcode_index; - const circle::OperatorCodeT &opcode = *opcodes[opcode_index]; + const auto opcode = opcodes[opcode_index]; + assert(opcode != nullptr); node->custom_options( std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()}); - node->custom_code(opcode.custom_code); + assert(opcode->custom_code() != nullptr); + node->custom_code(opcode->custom_code()->c_str()); // NOTE Operator version of custom is always 1 diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp index 49eb30a83..83fc2e37d 100644 --- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp +++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp @@ -34,9 +34,10 @@ bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const const auto &outputs = args.op.outputs; const auto *options = args.op.builtin_options.AsDepthToSpaceOptions(); - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); + assert(tensors[outputs[0]] != nullptr && tensors[inputs.at(0)] != nullptr); - if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type) + if (tensors[outputs[0]]->type() != tensors[inputs.at(0)]->type()) { return false; } diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp index 727487c6a..a24e4160d 100644 --- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp +++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp @@ -32,19 +32,21 @@ bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const if (args.op.outputs.size() != 1) return false; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); // input shape - const auto &input = tensors.at(args.op.inputs.at(0)); - const auto &input_shape = input->shape; + const auto input = tensors.at(args.op.inputs.at(0)); + assert(input != nullptr); + const auto input_shape = wrap(input->shape()); // input shape must be rank 4 if (input_shape.size() != 4) return false; // filter shape - const auto &filter = tensors.at(args.op.inputs.at(1)); - const auto &filter_shape = filter->shape; + const auto filter = tensors.at(args.op.inputs.at(1)); + assert(filter != nullptr); + const auto filter_shape = wrap(filter->shape()); // filter shape must be rank 4 if (filter_shape.size() != 4) diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp index 41696a65a..e5d7a4c7a 100644 --- a/compiler/luci/import/src/Nodes/CircleElu.cpp +++ b/compiler/luci/import/src/Nodes/CircleElu.cpp @@ -31,10 +31,11 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); - switch (tensor->type) + switch (tensor->type()) { case circle::TensorType_FLOAT64: break; @@ -48,7 +49,8 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensors[outputs[0]]->type != tensor->type) + assert(tensors[outputs[0]] != nullptr); + if (tensors[outputs[0]]->type() != tensor->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp index 4909692b4..b326d9b5d 100644 --- a/compiler/luci/import/src/Nodes/CircleEqual.cpp +++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp @@ -29,9 +29,10 @@ bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type; + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr); + return tensors[inputs.at(0)]->type() == tensors[inputs.at(1)]->type(); } CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp index 5bb7bb664..82c26f0e5 100644 --- a/compiler/luci/import/src/Nodes/CircleExp.cpp +++ b/compiler/luci/import/src/Nodes/CircleExp.cpp @@ -30,9 +30,10 @@ bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; // input type check - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp index ee0fbdc7e..67d9b7e9e 100644 --- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp +++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp @@ -29,9 +29,10 @@ bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - return tensors[inputs.at(1)]->type == circle::TensorType_INT32; + assert(tensors[inputs.at(1)] != nullptr); + return tensors[inputs.at(1)]->type() == circle::TensorType_INT32; } CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp index ce329326a..67eeddf91 100644 --- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp +++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp @@ -30,15 +30,18 @@ bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in_0 = tensors.at(inputs.at(0)); - const auto &tensor_in_1 = tensors.at(inputs.at(1)); - const auto &tensor_out = tensors.at(outputs[0]); - - if (tensor_in_0->type != tensor_in_1->type) + const auto tensors = args.reader.tensors(); + const auto tensor_in_0 = tensors.at(inputs.at(0)); + const auto tensor_in_1 = tensors.at(inputs.at(1)); + const auto tensor_out = tensors.at(outputs[0]); + assert(tensor_in_0 != nullptr); + assert(tensor_in_1 != nullptr); + assert(tensor_out != nullptr); + + if (tensor_in_0->type() != tensor_in_1->type()) return false; - if (tensor_out->type != tensor_in_1->type) + if (tensor_out->type() != tensor_in_1->type()) { return false; } diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp index d8420a43c..d2a275b62 100644 --- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp +++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp @@ -29,10 +29,11 @@ bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in_0 = tensors.at(inputs.at(0)); - const auto &tensor_in_1 = tensors.at(inputs.at(1)); - if (tensor_in_0->type != tensor_in_1->type) + const auto tensors = args.reader.tensors(); + const auto tensor_in_0 = tensors.at(inputs.at(0)); + const auto tensor_in_1 = tensors.at(inputs.at(1)); + assert(tensor_in_0 != nullptr && tensor_in_1 != nullptr); + if (tensor_in_0->type() != tensor_in_1->type()) return false; // TODO dtype check diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp index 58750d79a..cc7be1693 100644 --- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp +++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp @@ -42,6 +42,7 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT const auto *options = op.builtin_options.AsFullyConnectedOptions(); node->fusedActivationFunction(luci_actfunc(options->fused_activation_function)); node->weights_format(luci_weights_format(options->weights_format)); + node->keep_num_dims(options->keep_num_dims); return node; } diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp index a4bb26a10..d336878ad 100644 --- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp +++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp @@ -31,10 +31,11 @@ bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - auto &indices_tensor = args.reader.tensors()[inputs.at(1)]; + auto indices_tensor = args.reader.tensors()[inputs.at(1)]; + assert(indices_tensor != nullptr); - if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 || - indices_tensor->type == circle::TensorType::TensorType_INT64)) + if (!(indices_tensor->type() == circle::TensorType::TensorType_INT32 || + indices_tensor->type() == circle::TensorType::TensorType_INT64)) { return false; } diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp index f9c00346c..7f031b0ba 100644 --- a/compiler/luci/import/src/Nodes/CircleGreater.cpp +++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp @@ -37,17 +37,19 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type) + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type()) return false; // NOTE: real models do have output dtype NOT BOOL - if (tensors[outputs[0]]->type != circle::TensorType_BOOL) + assert(tensors[outputs[0]] != nullptr); + if (tensors[outputs[0]]->type() != circle::TensorType_BOOL) { if (settings->get(luci::UserSettings::Key::DisableValidation)) { - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto output_tensor = tensors[outputs[0]]; auto name = tensor_name(output_tensor); WARN(l) << "Warning: import Greater(" << name << ") output dtype is not boolean"; } diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp index e20038fd9..ac4ce62f5 100644 --- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp +++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp @@ -30,14 +30,16 @@ bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type) + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type()) { return false; } - return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL; + assert(tensors[outputs[0]] != nullptr); + return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL; } CircleNode *CircleGreaterEqualGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp index ffdbf0b79..e8a50ff32 100644 --- a/compiler/luci/import/src/Nodes/CircleIf.cpp +++ b/compiler/luci/import/src/Nodes/CircleIf.cpp @@ -42,12 +42,13 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const return false; // input 0 should be BOOL type - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - if (tensor->type != circle::TensorType_BOOL) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType_BOOL) return false; - const auto &shape = tensor->shape; + const auto shape = wrap(tensor->shape()); if (shape.size() != 1 && shape.size() != 0) return false; diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp index f9b99bebe..5c5ae51e1 100644 --- a/compiler/luci/import/src/Nodes/CircleLess.cpp +++ b/compiler/luci/import/src/Nodes/CircleLess.cpp @@ -30,10 +30,11 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); - switch (tensor->type) + switch (tensor->type()) { case circle::TensorType_FLOAT32: case circle::TensorType_FLOAT64: @@ -48,12 +49,14 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensors[inputs.at(1)]->type != tensor->type) + assert(tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(1)]->type() != tensor->type()) { return false; } - return tensors[outputs[0]]->type == circle::TensorType_BOOL; + assert(tensors[outputs[0]] != nullptr); + return tensors[outputs[0]]->type() == circle::TensorType_BOOL; } CircleNode *CircleLessGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp index bb1712137..8a2aea8db 100644 --- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp +++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp @@ -30,14 +30,16 @@ bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type) + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type()) { return false; } - return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL; + assert(tensors[outputs[0]] != nullptr); + return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL; } CircleNode *CircleLessEqualGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp index 26b575070..f41926829 100644 --- a/compiler/luci/import/src/Nodes/CircleLog.cpp +++ b/compiler/luci/import/src/Nodes/CircleLog.cpp @@ -32,9 +32,10 @@ bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const // input type check // Must be one of bfloat16, half, float32, float64, complex64, complex128. // Currently circle supports half(float16), float32, float64, complex64. - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp index b13fc2735..b61fb6f3e 100644 --- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp +++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp @@ -30,11 +30,12 @@ bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const // Only BOOL type is allowed for inputs const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); for (auto input : inputs) { - const auto &tensor = tensors.at(input); - if (tensor->type != circle::TensorType::TensorType_BOOL) + const auto tensor = tensors.at(input); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType::TensorType_BOOL) return false; } diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp index f68218349..43e9ed39f 100644 --- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp +++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp @@ -30,9 +30,10 @@ bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const // Only BOOL type is allowed for the input const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - if (tensor->type != circle::TensorType::TensorType_BOOL) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType::TensorType_BOOL) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp index 8c9023dd3..6354e7dc1 100644 --- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp +++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp @@ -30,11 +30,12 @@ bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const // Only BOOL type is allowed for inputs const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); for (auto input : inputs) { - const auto &tensor = tensors.at(input); - if (tensor->type != circle::TensorType::TensorType_BOOL) + const auto tensor = tensors.at(input); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType::TensorType_BOOL) return false; } diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp index 0f92a9bb4..b0d08e039 100644 --- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp +++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp @@ -30,8 +30,9 @@ bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type) + const auto tensors = args.reader.tensors(); + assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr); + if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp index 590a07f2d..384b98586 100644 --- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp +++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp @@ -30,10 +30,11 @@ bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); - if (tensors[outputs[0]]->type != tensor->type) + assert(tensors[outputs[0]] != nullptr && tensor != nullptr); + if (tensors[outputs[0]]->type() != tensor->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp index edd7d2ae2..64870c057 100644 --- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp +++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp @@ -30,10 +30,11 @@ bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); - if (tensors[outputs[0]]->type != tensor->type) + assert(tensors[outputs[0]] != nullptr && tensor != nullptr); + if (tensors[outputs[0]]->type() != tensor->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp index d3d69506b..e86f2ba81 100644 --- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp +++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp @@ -35,20 +35,26 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c if (outputs.size() != 2) return false; - const auto &tensors = args.reader.tensors(); - const auto &boxes_tensor = tensors.at(inputs[0]); - if (boxes_tensor->shape.size() != 2) + const auto tensors = args.reader.tensors(); + const auto boxes_tensor = tensors.at(inputs[0]); + assert(boxes_tensor != nullptr); + const auto boxes_tensor_shape = wrap(boxes_tensor->shape()); + if (boxes_tensor_shape.size() != 2) return false; - if (boxes_tensor->shape.at(1) != 4) + if (boxes_tensor_shape.at(1) != 4) return false; - if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0)) + assert(tensors.at(inputs[1]) != nullptr); + if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0)) return false; - if (tensors.at(inputs[2])->type != circle::TensorType_INT32) + assert(tensors.at(inputs[2]) != nullptr); + if (tensors.at(inputs[2])->type() != circle::TensorType_INT32) return false; - if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32) + assert(tensors.at(inputs[3]) != nullptr); + if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32) return false; - if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32) + assert(tensors.at(inputs[4]) != nullptr); + if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp index d797d4cb7..a60eed4e4 100644 --- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp +++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp @@ -35,22 +35,29 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c if (outputs.size() != 3) return false; - const auto &tensors = args.reader.tensors(); - const auto &boxes_tensor = tensors.at(inputs[0]); - if (boxes_tensor->shape.size() != 2) + const auto tensors = args.reader.tensors(); + const auto boxes_tensor = tensors.at(inputs[0]); + assert(boxes_tensor != nullptr); + const auto boxes_tensor_shape = wrap(boxes_tensor->shape()); + if (boxes_tensor_shape.size() != 2) return false; - if (boxes_tensor->shape.at(1) != 4) + if (boxes_tensor_shape.at(1) != 4) return false; - if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0)) + assert(tensors.at(inputs[1]) != nullptr); + if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0)) return false; - if (tensors.at(inputs[2])->type != circle::TensorType_INT32) + assert(tensors.at(inputs[2]) != nullptr); + if (tensors.at(inputs[2])->type() != circle::TensorType_INT32) return false; - if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32) + assert(tensors.at(inputs[3]) != nullptr); + if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32) return false; - if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32) + assert(tensors.at(inputs[4]) != nullptr); + if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32) return false; - if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32) + assert(tensors.at(inputs[5]) != nullptr); + if (tensors.at(inputs[5])->type() != circle::TensorType_FLOAT32) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp index a0b8f9e4f..3f5c1e033 100644 --- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp +++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp @@ -30,14 +30,16 @@ bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type) + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type()) { return false; } - return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL; + assert(tensors[outputs[0]] != nullptr); + return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL; } CircleNode *CircleNotEqualGraphBuilder::build_node(const circle::OperatorT &, diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp index 3952cc21a..6e5f8e16f 100644 --- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp +++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp @@ -32,21 +32,25 @@ bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto *options = args.op.builtin_options.AsOneHotOptions(); - const auto &tensors = args.reader.tensors(); - const auto &indices = tensors.at(inputs.at(0)); - const auto &depth = tensors.at(inputs.at(1)); - const auto &on_value = tensors.at(inputs.at(2)); - const auto &off_value = tensors.at(inputs.at(3)); + const auto tensors = args.reader.tensors(); + const auto indices = tensors.at(inputs.at(0)); + const auto depth = tensors.at(inputs.at(1)); + const auto on_value = tensors.at(inputs.at(2)); + const auto off_value = tensors.at(inputs.at(3)); + assert(indices != nullptr); + assert(depth != nullptr); + assert(on_value != nullptr); + assert(off_value != nullptr); - if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size())) + if (options->axis < -1 || options->axis > static_cast<int32_t>(wrap(indices->shape()).size())) return false; - if (depth->shape.size() != 0) + if (wrap(depth->shape()).size() != 0) return false; - if (on_value->shape.size() != 0) + if (wrap(on_value->shape()).size() != 0) return false; - if (off_value->shape.size() != 0) + if (wrap(off_value->shape()).size() != 0) return false; - if (on_value->type != off_value->type) + if (on_value->type() != off_value->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp index 13205dd7a..ebe2368e0 100644 --- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp +++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp @@ -28,17 +28,20 @@ bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_0 = tensors.at(inputs.at(0)); - const auto &tensor_1 = tensors.at(inputs.at(1)); - const auto &tensor_o = tensors.at(outputs[0]); + const auto tensors = args.reader.tensors(); + const auto tensor_0 = tensors.at(inputs.at(0)); + const auto tensor_1 = tensors.at(inputs.at(1)); + const auto tensor_o = tensors.at(outputs[0]); + assert(tensor_0 != nullptr); + assert(tensor_1 != nullptr); + assert(tensor_o != nullptr); - if (tensor_0->type != circle::TensorType_BOOL) + if (tensor_0->type() != circle::TensorType_BOOL) return false; - if (tensor_o->type != circle::TensorType_BOOL) + if (tensor_o->type() != circle::TensorType_BOOL) return false; - switch (tensor_1->type) + switch (tensor_1->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp index 3549c1a18..3b874b7c9 100644 --- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp +++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp @@ -27,13 +27,14 @@ bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_1 = tensors.at(inputs.at(1)); + const auto tensors = args.reader.tensors(); + const auto tensor_1 = tensors.at(inputs.at(1)); + assert(tensor_1 != nullptr); // TODO check input types // Check for reduction_indices types - switch (tensor_1->type) + switch (tensor_1->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp index 401dff0fc..3421620ce 100644 --- a/compiler/luci/import/src/Nodes/CircleReshape.cpp +++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp @@ -34,12 +34,13 @@ bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const if (args.op.inputs.size() == 2) { const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in = tensors.at(inputs.at(1)); + const auto tensors = args.reader.tensors(); + const auto tensor_in = tensors.at(inputs.at(1)); + assert(tensor_in != nullptr); // NOTE fix this if there is any other case // TensorFlow lite and circle only supports S32 - if (tensor_in->type != circle::TensorType::TensorType_INT32) + if (tensor_in->type() != circle::TensorType::TensorType_INT32) return false; } diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp index 2fbb7a87c..c9cc792bb 100644 --- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp +++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp @@ -30,12 +30,15 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in = tensors.at(inputs.at(0)); - const auto &tensor_lengths = tensors.at(inputs.at(1)); - const auto &tensor_out = tensors.at(outputs[0]); + const auto tensors = args.reader.tensors(); + const auto tensor_in = tensors.at(inputs.at(0)); + const auto tensor_lengths = tensors.at(inputs.at(1)); + const auto tensor_out = tensors.at(outputs[0]); + assert(tensor_in != nullptr); + assert(tensor_lengths != nullptr); + assert(tensor_out != nullptr); - switch (tensor_lengths->type) + switch (tensor_lengths->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -44,7 +47,7 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensor_in->type != tensor_out->type) + if (tensor_in->type() != tensor_out->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp index ca7653201..c19a0fdd2 100644 --- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp +++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp @@ -30,12 +30,15 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in = tensors.at(inputs.at(0)); - const auto &tensor_axis = tensors.at(inputs.at(1)); - const auto &tensor_out = tensors.at(outputs[0]); + const auto tensors = args.reader.tensors(); + const auto tensor_in = tensors.at(inputs.at(0)); + const auto tensor_axis = tensors.at(inputs.at(1)); + const auto tensor_out = tensors.at(outputs[0]); + assert(tensor_in != nullptr); + assert(tensor_axis != nullptr); + assert(tensor_out != nullptr); - switch (tensor_axis->type) + switch (tensor_axis->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -44,7 +47,7 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensor_out->type != tensor_in->type) + if (tensor_out->type() != tensor_in->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp index d13e0fafe..08cfae6c2 100644 --- a/compiler/luci/import/src/Nodes/CircleRound.cpp +++ b/compiler/luci/import/src/Nodes/CircleRound.cpp @@ -33,11 +33,13 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const // Must be one of the following types // bfloat16, half (float16), float32, float64, complex64, complex128 // Currently, circle supports float16, float32, complex64 - const auto &tensors = args.reader.tensors(); - const auto &tensor_in = tensors.at(inputs.at(0)); - const auto &tensor_out = tensors.at(outputs[0]); + const auto tensors = args.reader.tensors(); + const auto tensor_in = tensors.at(inputs.at(0)); + const auto tensor_out = tensors.at(outputs[0]); + assert(tensor_in != nullptr); + assert(tensor_out != nullptr); - switch (tensor_in->type) + switch (tensor_in->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: @@ -49,7 +51,7 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensor_out->type != tensor_in->type) + if (tensor_out->type() != tensor_in->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp index a9ca90832..e3bc68f8b 100644 --- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp +++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp @@ -32,9 +32,10 @@ bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const // Must be one of the following types // bfloat16, half (float16), float32, float64, complex64, complex128 // Currently, circle supports float16, float32, complex64 - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_UINT8: case circle::TensorType_INT16: diff --git a/compiler/luci/import/src/Nodes/CircleSVDF.cpp b/compiler/luci/import/src/Nodes/CircleSVDF.cpp new file mode 100644 index 000000000..83a025177 --- /dev/null +++ b/compiler/luci/import/src/Nodes/CircleSVDF.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CircleSVDF.h" + +#include <luci/IR/Nodes/CircleSVDF.h> + +#include <loco.h> + +namespace luci +{ + +bool CircleSVDFBuilder::validate(const ValidateArgs &args) const +{ + const auto &inputs = args.op.inputs; + if (!(inputs.size() == 4 || inputs.size() == 5)) + return false; + + return true; +} + +CircleNode *CircleSVDFBuilder::build_node(const circle::OperatorT &op, + const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const +{ + auto *node = graph->nodes()->create<CircleSVDF>(); + node->input(inputs.at(0)); + node->weight_feature(inputs.at(1)); + node->weight_time(inputs.at(2)); + if (inputs.size() == 4) + { + auto *bias = graph->nodes()->create<CircleOutputExclude>(); + // CircleOutputExclude doesn't need a type, but since all nodes must have a type, + // a dummy type is inserted. + bias->dtype(inputs.at(0)->dtype()); + node->bias(bias); + + node->input_activation_state(inputs.at(3)); + } + else + { + node->bias(inputs.at(3)); + node->input_activation_state(inputs.at(4)); + } + + const auto *options = op.builtin_options.AsSVDFOptions(); + node->svdf_rank(options->rank); + node->fusedActivationFunction(luci_actfunc(options->fused_activation_function)); + node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs); + + return node; +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp index f8c175110..ebe252527 100644 --- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp +++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp @@ -30,14 +30,15 @@ bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; // indices must have the same type as shape - const auto &tensors = args.reader.tensors(); + const auto tensors = args.reader.tensors(); - if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type) + assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(2)] != nullptr); + if (tensors[inputs.at(0)]->type() != tensors[inputs.at(2)]->type()) return false; // indices must be either int32 or int64 - if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 && - tensors[inputs.at(0)]->type != circle::TensorType_INT64) + if (tensors[inputs.at(0)]->type() != circle::TensorType_INT32 && + tensors[inputs.at(0)]->type() != circle::TensorType_INT64) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp index bfa333e8d..01d1aab44 100644 --- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp +++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp @@ -30,12 +30,15 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_in = tensors.at(inputs.at(0)); - const auto &tensor_out = tensors.at(outputs[0]); - const auto &tensor_ids = tensors.at(inputs.at(1)); + const auto tensors = args.reader.tensors(); + const auto tensor_in = tensors.at(inputs.at(0)); + const auto tensor_out = tensors.at(outputs[0]); + const auto tensor_ids = tensors.at(inputs.at(1)); + assert(tensor_in != nullptr); + assert(tensor_out != nullptr); + assert(tensor_ids != nullptr); - switch (tensor_ids->type) + switch (tensor_ids->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -44,7 +47,7 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const return false; } - if (tensor_out->type != tensor_in->type) + if (tensor_out->type() != tensor_in->type()) { return false; } diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp index 36a5fa8a8..002f62f6c 100644 --- a/compiler/luci/import/src/Nodes/CircleSelect.cpp +++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp @@ -29,9 +29,10 @@ bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - if (tensor->type != circle::TensorType_BOOL) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType_BOOL) return false; // TODO check dtypes for input 1, 2 diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp index 556c8fa33..062fdc143 100644 --- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp +++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp @@ -29,14 +29,16 @@ bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &condition = tensors.at(inputs.at(0)); - if (condition->type != circle::TensorType_BOOL) + const auto tensors = args.reader.tensors(); + const auto condition = tensors.at(inputs.at(0)); + assert(condition != nullptr); + if (condition->type() != circle::TensorType_BOOL) return false; - const auto &t = tensors.at(inputs.at(1)); - const auto &e = tensors.at(inputs.at(2)); - if (t->type != e->type) + const auto t = tensors.at(inputs.at(1)); + const auto e = tensors.at(inputs.at(2)); + assert(t != nullptr && e != nullptr); + if (t->type() != e->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp index 22f461123..51ebf0355 100644 --- a/compiler/luci/import/src/Nodes/CircleSin.cpp +++ b/compiler/luci/import/src/Nodes/CircleSin.cpp @@ -30,9 +30,10 @@ bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; // input type check - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp index 7ff2b84e6..bec84b4c0 100644 --- a/compiler/luci/import/src/Nodes/CircleSquare.cpp +++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp @@ -29,13 +29,13 @@ bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - // Must be one of the following types - // bfloat16, half (float16), float32, float64, complex64, complex128 - // Currently, circle supports float16, float32, complex64 - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { + case circle::TensorType_UINT8: + case circle::TensorType_INT16: case circle::TensorType_INT32: case circle::TensorType_INT64: case circle::TensorType_FLOAT16: diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp index 33440d5ab..1983465d3 100644 --- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp +++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp @@ -32,9 +32,10 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con const auto &outputs = args.op.outputs; // Inputs must be one of the following types // bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128 - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: @@ -53,11 +54,13 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con } // Input types must match - if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type) + assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(inputs.at(1)) != nullptr); + if (tensors.at(inputs.at(0))->type() != tensors.at(inputs.at(1))->type()) return false; // Input and output types must match - if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type) + assert(tensors.at(outputs[0]) != nullptr); + if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp index 95625a0e4..80a0e887f 100644 --- a/compiler/luci/import/src/Nodes/CircleTanh.cpp +++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp @@ -30,8 +30,9 @@ bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type) + const auto tensors = args.reader.tensors(); + assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr); + if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp index 6da44130c..c41a6ba3f 100644 --- a/compiler/luci/import/src/Nodes/CircleTile.cpp +++ b/compiler/luci/import/src/Nodes/CircleTile.cpp @@ -32,9 +32,10 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const auto outputs = args.op.outputs; // Multiples (inputs.at(1)) must be one of the following types // int32, int64 - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(1)); - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(1)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -44,7 +45,8 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const } // Type of input and output must be the same - if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type) + assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr); + if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type()) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp index 49f858798..9f9173738 100644 --- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp +++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp @@ -35,9 +35,10 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const if (outputs.size() != 2) return false; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(1)); - if (tensor->type != circle::TensorType_INT32) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(1)); + assert(tensor != nullptr); + if (tensor->type() != circle::TensorType_INT32) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp index 5a60e2f54..041983dac 100644 --- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp +++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp @@ -31,11 +31,13 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const return false; const auto &inputs = args.op.inputs; - const auto &tensors = args.reader.tensors(); - const auto &filter_tensor = tensors.at(inputs.at(1)); - const auto &filter_shape = filter_tensor.get()->shape; - const auto &ifm_tensor = tensors.at(inputs.at(2)); - const auto &ifm_shape = ifm_tensor.get()->shape; + const auto tensors = args.reader.tensors(); + const auto filter_tensor = tensors.at(inputs.at(1)); + assert(filter_tensor != nullptr); + const auto filter_shape = wrap(filter_tensor->shape()); + const auto ifm_tensor = tensors.at(inputs.at(2)); + assert(ifm_tensor != nullptr); + const auto ifm_shape = wrap(ifm_tensor->shape()); // ifm and filters must be 4-D tensor if (ifm_shape.size() != 4) @@ -45,7 +47,7 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const // input shape : [batch, height, width, in_channels] // filters shape : [output_channels, height, weight, in_channels] - if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3)) + if (ifm_shape.at(3) != filter_shape.at(3)) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp index 9bfc76b57..6b3401609 100644 --- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp +++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp @@ -46,8 +46,8 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const { if (settings->get(luci::UserSettings::Key::DisableValidation)) { - const auto &tensors = args.reader.tensors(); - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto tensors = args.reader.tensors(); + const auto output_tensor = tensors[outputs[0]]; auto name = tensor_name(output_tensor); WARN(l) << "Warning: import Unpack(" << name << ") 'num' is not same as outputs used"; } @@ -58,9 +58,10 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const if (options->num < 0) return false; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - const auto &shape = tensor->shape; + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + const auto shape = wrap(tensor->shape()); auto shape_size = static_cast<int32_t>(shape.size()); if (shape_size > 0) { diff --git a/compiler/luci/import/src/Nodes/CircleVariable.cpp b/compiler/luci/import/src/Nodes/CircleVariable.cpp new file mode 100644 index 000000000..23ae9e7be --- /dev/null +++ b/compiler/luci/import/src/Nodes/CircleVariable.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CircleVariable.h" + +#include <luci/IR/Nodes/CircleVariable.h> +#include <luci/Log.h> + +#include <cassert> +#include <ostream> +#include <string> +#include <vector> + +namespace +{ + +std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect) +{ + uint32_t seq = 0; + for (const auto &v : vect) + { + if (seq) + os << ", "; + os << v; + seq++; + } + return os; +} + +} // namespace + +namespace luci +{ + +CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index) +{ + LOGGER(l); + + auto graph = context->graph(); + auto reader = context->reader(); + const auto tensors = reader->tensors(); + const auto variable_tensor = tensors[tensor_index]; + assert(variable_tensor != nullptr); + + if (not variable_tensor->is_variable()) + { + // not a variable + return nullptr; + } + { + // check if there is no buffer as we don't support this for now + // TODO use buffer when this is enabled in Kernel + assert(reader->buffers()[variable_tensor->buffer()] != nullptr); + assert(reader->buffers()[variable_tensor->buffer()]->data() == nullptr); + } + + auto variable_node = graph->nodes()->create<CircleVariable>(); + copy_tensor_attributes(variable_tensor, variable_node); + variable_node->shape_status(luci::ShapeStatus::VALID); + + INFO(l) << "[luci] NodeFinder variable node(" << tensor_index << ") -> " << variable_node << " " + << wrap(variable_tensor->shape()) << std::endl; + + return variable_node; +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp index 8e4f1a0c4..bc6199ace 100644 --- a/compiler/luci/import/src/Nodes/CircleWhere.cpp +++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp @@ -30,14 +30,16 @@ bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const const auto &inputs = args.op.inputs; const auto &outputs = args.op.outputs; - const auto &tensors = args.reader.tensors(); - const auto &tensor_condition = tensors.at(inputs.at(0)); - const auto &tensor_out = tensors.at(outputs[0]); + const auto tensors = args.reader.tensors(); + const auto tensor_condition = tensors.at(inputs.at(0)); + const auto tensor_out = tensors.at(outputs[0]); + assert(tensor_condition != nullptr); + assert(tensor_out != nullptr); - if (tensor_condition->type != circle::TensorType_BOOL) + if (tensor_condition->type() != circle::TensorType_BOOL) return false; - if (tensor_out->type != circle::TensorType_INT64) + if (tensor_out->type() != circle::TensorType_INT64) return false; return true; diff --git a/compiler/luci/import/src/Nodes/CircleWhile.cpp b/compiler/luci/import/src/Nodes/CircleWhile.cpp index 26147562f..27a392b2a 100644 --- a/compiler/luci/import/src/Nodes/CircleWhile.cpp +++ b/compiler/luci/import/src/Nodes/CircleWhile.cpp @@ -67,8 +67,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op, const std::vector<int32_t> &inputs = op.inputs; const std::vector<int32_t> &outputs = op.outputs; - const auto &tensors = context->reader()->tensors(); - const auto &opcodes = context->reader()->opcodes(); + const auto tensors = context->reader()->tensors(); + const auto opcodes = context->reader()->opcodes(); std::vector<CircleNode *> input_nodes; for (const int32_t input_tensor_index : inputs) @@ -96,9 +96,11 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op, assert(outputs.size() > 0); { // Lets use name of output 0 as While name - const circle::TensorT &output_tensor = *tensors[outputs[0]]; + const auto output_tensor = tensors[outputs[0]]; + assert(output_tensor != nullptr); node->name(tensor_name(output_tensor)); - node->op_version(opcodes[op.opcode_index].get()->version); + assert(opcodes[op.opcode_index] != nullptr); + node->op_version(opcodes[op.opcode_index]->version()); // NOTE We don't set quantization for While itself but to virtual outputs } @@ -106,7 +108,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op, // Create virtual outputs of While for (uint32_t n = 0; n < output_count; ++n) { - const circle::TensorT &output_tensor = *tensors[outputs[n]]; + const auto output_tensor = tensors[outputs[n]]; + assert(output_tensor != nullptr); auto *nodeout = graph->nodes()->create<CircleWhileOut>(); diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp index 27306ba90..fc027704b 100644 --- a/compiler/luci/import/src/ValidateHelpers.cpp +++ b/compiler/luci/import/src/ValidateHelpers.cpp @@ -26,9 +26,10 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args) return false; // input 1 and 2 should have INT32/INT64 type - const auto &tensors = args.reader.tensors(); - const auto &tensor_1 = tensors.at(inputs.at(1)); - switch (tensor_1->type) + const auto tensors = args.reader.tensors(); + const auto tensor_1 = tensors.at(inputs.at(1)); + assert(tensor_1 != nullptr); + switch (tensor_1->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -36,8 +37,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args) default: return false; } - const auto &tensor_2 = tensors.at(inputs.at(2)); - switch (tensor_2->type) + const auto tensor_2 = tensors.at(inputs.at(2)); + assert(tensor_2 != nullptr); + switch (tensor_2->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: @@ -47,8 +49,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args) } // Only support input shape dimension 3 and 4 only - const auto &tensor_0 = tensors.at(inputs.at(0)); - const auto t_0_s = tensor_0->shape.size(); + const auto tensor_0 = tensors.at(inputs.at(0)); + assert(tensor_0 != nullptr); + const auto t_0_s = wrap(tensor_0->shape()).size(); if (t_0_s != 3 && t_0_s != 4) return false; @@ -68,10 +71,10 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args) if (outputs.size() != 1) return false; - const auto &tensors = args.reader.tensors(); - const auto &tensor = tensors.at(inputs.at(0)); - - switch (tensor->type) + const auto tensors = args.reader.tensors(); + const auto tensor = tensors.at(inputs.at(0)); + assert(tensor != nullptr); + switch (tensor->type()) { case circle::TensorType_FLOAT16: case circle::TensorType_FLOAT32: @@ -84,10 +87,12 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args) return false; } - if (tensors[inputs.at(1)]->type != tensor->type) + assert(tensors[inputs.at(1)] != nullptr); + if (tensors[inputs.at(1)]->type() != tensor->type()) return false; - if (tensors[outputs[0]]->type != tensor->type) + assert(tensors[outputs[0]] != nullptr); + if (tensors[outputs[0]]->type() != tensor->type()) return false; return true; @@ -104,10 +109,10 @@ bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args) if (outputs.size() != 1) return false; - const auto &tensors = args.reader.tensors(); - const auto &tensor_axis = tensors.at(inputs.at(1)); - - switch (tensor_axis->type) + const auto tensors = args.reader.tensors(); + const auto tensor_axis = tensors.at(inputs.at(1)); + assert(tensor_axis != nullptr); + switch (tensor_axis->type()) { case circle::TensorType_INT32: case circle::TensorType_INT64: diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h index a313f9d5b..d89ea03cc 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.h +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h @@ -29,7 +29,6 @@ #include "Nodes/CircleCast.h" #include "Nodes/CircleCeil.h" #include "Nodes/CircleConcatenation.h" -#include "Nodes/CircleConst.h" #include "Nodes/CircleConv2D.h" #include "Nodes/CircleCos.h" #include "Nodes/CircleCustom.h" @@ -119,6 +118,7 @@ #include "Nodes/CircleStridedSlice.h" #include "Nodes/CircleSub.h" #include "Nodes/CircleSum.h" +#include "Nodes/CircleSVDF.h" #include "Nodes/CircleTanh.h" #include "Nodes/CircleTile.h" #include "Nodes/CircleTopKV2.h" @@ -135,18 +135,21 @@ #include "Nodes/CircleBCQGather.h" #include "Nodes/CircleInstanceNorm.h" // Virtual nodes +#include "Nodes/CircleConst.h" #include "Nodes/CircleInput.h" #include "Nodes/CircleOutput.h" +#include "Nodes/CircleVariable.h" +// Multi-output virtual nodes #include "Nodes/CircleBidirectionalSequenceLSTMOut.h" #include "Nodes/CircleCustomOut.h" #include "Nodes/CircleIfOut.h" #include "Nodes/CircleNonMaxSuppressionV4Out.h" #include "Nodes/CircleNonMaxSuppressionV5Out.h" -#include "Nodes/CircleUnpackOut.h" -#include "Nodes/CircleUniqueOut.h" #include "Nodes/CircleSplitOut.h" #include "Nodes/CircleSplitVOut.h" #include "Nodes/CircleTopKV2Out.h" +#include "Nodes/CircleUniqueOut.h" +#include "Nodes/CircleUnpackOut.h" #include "Nodes/CircleWhileOut.h" #include <loco/IR/Graph.h> diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst index 914aa16e4..1472008df 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst @@ -116,6 +116,7 @@ CIRCLE_NODE(SQUEEZE, CircleSqueeze) CIRCLE_NODE(STRIDED_SLICE, CircleStridedSlice) CIRCLE_NODE(SUB, CircleSub) CIRCLE_NODE(SUM, CircleSum) +CIRCLE_NODE(SVDF, CircleSVDF) CIRCLE_NODE(TANH, CircleTanh) CIRCLE_NODE(TILE, CircleTile) CIRCLE_NODE(TOPK_V2, CircleTopKV2) @@ -132,12 +133,14 @@ CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnected) CIRCLE_NODE(BCQ_GATHER, CircleBCQGather) CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNorm) // Virtual node(s) -CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut) CIRCLE_VNODE(CIRCLECONST, CircleConst) CIRCLE_VNODE(CIRCLEINPUT, CircleInput) CIRCLE_VNODE(CIRCLEOUTPUT, CircleOutput) CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, CircleOutputDummy) CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExclude) +CIRCLE_VNODE(CIRCLEVARIABLE, CircleVariable) +// Multi-output virtual nodes +CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut) CIRCLE_VNODE(CIRCLECUSTOMOUT, CircleCustomOut) CIRCLE_VNODE(CIRCLEIFOUT, CircleIfOut) CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4Out) diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h index 694437303..8afc80a76 100644 --- a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h +++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h @@ -32,6 +32,10 @@ struct CircleQuantParam int32_t quantized_dimension{0}; }; +struct CircleNode; + +void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst); + } // namespace luci #endif // __LUCI_IR_CIRCLEQUANTPARAM_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h index 2862cadb2..dc5aeb267 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h @@ -58,8 +58,12 @@ public: WeightsFormat weights_format(void) const { return _weights_format; } void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; } + bool keep_num_dims(void) const { return _keep_num_dims; } + void keep_num_dims(bool keep_num_dims) { _keep_num_dims = keep_num_dims; } + private: WeightsFormat _weights_format{WeightsFormat::DEFAULT}; + bool _keep_num_dims{false}; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h new file mode 100644 index 000000000..839d11e04 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_SVDF_H__ +#define __LUCI_IR_CIRCLE_SVDF_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/LuciNodeMixins.h" + +namespace luci +{ + +/** + * @brief SVDF in Circle + */ +class CircleSVDF final : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::SVDF>>, + public CircleNodeMixin<CircleNodeTrait::FusedActFunc> +{ +public: + CircleSVDF() = default; + +public: + loco::Node *input(void) const { return at(0)->node(); } + void input(loco::Node *node) { at(0)->node(node); } + + loco::Node *weight_feature(void) const { return at(1)->node(); } + void weight_feature(loco::Node *node) { at(1)->node(node); } + + loco::Node *weight_time(void) const { return at(2)->node(); } + void weight_time(loco::Node *node) { at(2)->node(node); } + + loco::Node *bias(void) const { return at(3)->node(); } + void bias(loco::Node *node) { at(3)->node(node); } + + loco::Node *input_activation_state(void) const { return at(4)->node(); } + void input_activation_state(loco::Node *node) { at(4)->node(node); } + +public: + bool asymmetric_quantize_inputs() const { return _asymmetric_quantize_inputs; } + void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) + { + _asymmetric_quantize_inputs = asymmetric_quantize_inputs; + } + + int32_t svdf_rank() const { return _rank; } + void svdf_rank(int32_t svdf_rank) { _rank = svdf_rank; } + +private: + bool _asymmetric_quantize_inputs = false; + int32_t _rank = 0; +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_SVDF_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h new file mode 100644 index 000000000..8c15b66c9 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_VARIABLE_H__ +#define __LUCI_IR_CIRCLE_VARIABLE_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/CircleNodeMixins.h" + +namespace luci +{ + +/** + * @brief Virtual CircleVariable in Circle for 'variable' Tensor + */ +class CircleVariable final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEVARIABLE>> +{ +public: + CircleVariable() = default; +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_VARIABLE_H__ diff --git a/compiler/luci/lang/src/CircleQuantParam.cpp b/compiler/luci/lang/src/CircleQuantParam.cpp new file mode 100644 index 000000000..89671d3c3 --- /dev/null +++ b/compiler/luci/lang/src/CircleQuantParam.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/CircleQuantParam.h" +#include "luci/IR/CircleNode.h" + +#include <memory> + +namespace luci +{ + +/** + * @brief copy CircleQuantParam of src to dst + */ +void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst) +{ + auto q = src->quantparam(); + if (q == nullptr) + dst->quantparam(nullptr); + else + { + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->scale = q->scale; + qparam->zerop = q->zerop; + qparam->min = q->min; + qparam->max = q->max; + qparam->quantized_dimension = q->quantized_dimension; + + dst->quantparam(std::move(qparam)); + } +} + +} // namespace luci diff --git a/compiler/luci/lang/src/CircleQuantParam.test.cpp b/compiler/luci/lang/src/CircleQuantParam.test.cpp new file mode 100644 index 000000000..520ca05cc --- /dev/null +++ b/compiler/luci/lang/src/CircleQuantParam.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// NOTE any node will do for testing +#include "luci/IR/Nodes/CircleAdd.h" + +#include <loco/IR/Graph.h> + +#include <gtest/gtest.h> + +namespace +{ + +luci::CircleAdd *build_simple_add_graph(loco::Graph *g) +{ + auto node = g->nodes()->create<luci::CircleAdd>(); + + node->name("name"); + node->dtype(loco::DataType::FLOAT32); + node->rank(1); + node->dim(0).set(3); + node->shape_status(luci::ShapeStatus::VALID); + node->fusedActivationFunction(luci::FusedActFunc::NONE); + + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->scale = {1.0}; + qparam->zerop = {0}; + qparam->min = {0.0}; + qparam->max = {1.0}; + qparam->quantized_dimension = 0; + node->quantparam(std::move(qparam)); + + return node; +} + +} // namespace + +TEST(CircleNodeCloneTest, copy_quantparam) +{ + auto g = loco::make_graph(); + auto node = build_simple_add_graph(g.get()); + + auto copy = g->nodes()->create<luci::CircleAdd>(); + luci::copy_quantparam(node, copy); + + const auto *qparam_node = node->quantparam(); + const auto *qparam_copy = copy->quantparam(); + ASSERT_EQ(qparam_node->scale, qparam_copy->scale); + ASSERT_EQ(qparam_node->zerop, qparam_copy->zerop); + ASSERT_EQ(qparam_node->quantized_dimension, qparam_copy->quantized_dimension); +} + +TEST(CircleNodeCloneTest, copy_quantparam_NEG) +{ + auto g = loco::make_graph(); + auto node = build_simple_add_graph(g.get()); + + node->quantparam(nullptr); + + auto copy = g->nodes()->create<luci::CircleAdd>(); + luci::copy_quantparam(node, copy); + + const auto *qparam_copy = copy->quantparam(); + ASSERT_EQ(qparam_copy, nullptr); +} diff --git a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp index bb0e3c51b..15a780085 100644 --- a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp +++ b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp @@ -32,6 +32,7 @@ TEST(CircleFullyConnectedTest, constructor) ASSERT_EQ(nullptr, fc_node.weights()); ASSERT_EQ(nullptr, fc_node.bias()); ASSERT_EQ(luci::FusedActFunc::UNDEFINED, fc_node.fusedActivationFunction()); + ASSERT_EQ(false, fc_node.keep_num_dims()); } TEST(CircleFullyConnectedTest, input_NEG) diff --git a/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp new file mode 100644 index 000000000..833ae0732 --- /dev/null +++ b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/Nodes/CircleSVDF.h" + +#include "luci/IR/CircleDialect.h" +#include "luci/IR/CircleNodeVisitor.h" + +#include <gtest/gtest.h> + +TEST(CircleSVDFTest, constructor) +{ + luci::CircleSVDF svdf_node; + + ASSERT_EQ(luci::CircleDialect::get(), svdf_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::SVDF, svdf_node.opcode()); + + ASSERT_EQ(nullptr, svdf_node.input()); + ASSERT_EQ(nullptr, svdf_node.weight_feature()); + ASSERT_EQ(nullptr, svdf_node.weight_time()); + ASSERT_EQ(nullptr, svdf_node.bias()); + ASSERT_EQ(nullptr, svdf_node.input_activation_state()); + + ASSERT_EQ(false, svdf_node.asymmetric_quantize_inputs()); + ASSERT_EQ(0, svdf_node.svdf_rank()); +} + +TEST(CircleSVDFTest, input_NEG) +{ + luci::CircleSVDF svdf_node; + luci::CircleSVDF node; + + svdf_node.input(&node); + svdf_node.weight_feature(&node); + svdf_node.weight_time(&node); + svdf_node.bias(&node); + svdf_node.input_activation_state(&node); + + ASSERT_NE(nullptr, svdf_node.input()); + ASSERT_NE(nullptr, svdf_node.weight_feature()); + ASSERT_NE(nullptr, svdf_node.weight_time()); + ASSERT_NE(nullptr, svdf_node.bias()); + ASSERT_NE(nullptr, svdf_node.input_activation_state()); + + svdf_node.input(nullptr); + svdf_node.weight_feature(nullptr); + svdf_node.weight_time(nullptr); + svdf_node.bias(nullptr); + svdf_node.input_activation_state(nullptr); + + ASSERT_EQ(nullptr, svdf_node.input()); + ASSERT_EQ(nullptr, svdf_node.weight_feature()); + ASSERT_EQ(nullptr, svdf_node.weight_time()); + ASSERT_EQ(nullptr, svdf_node.bias()); + ASSERT_EQ(nullptr, svdf_node.input_activation_state()); +} + +TEST(CircleSVDFTest, arity_NEG) +{ + luci::CircleSVDF svdf_node; + + ASSERT_NO_THROW(svdf_node.arg(4)); + ASSERT_THROW(svdf_node.arg(5), std::out_of_range); +} + +TEST(CircleSVDFTest, visit_mutable_NEG) +{ + struct TestVisitor final : public luci::CircleNodeMutableVisitor<void> + { + }; + + luci::CircleSVDF svdf_node; + + TestVisitor tv; + ASSERT_THROW(svdf_node.accept(&tv), std::exception); +} + +TEST(CircleSVDFTest, visit_NEG) +{ + struct TestVisitor final : public luci::CircleNodeVisitor<void> + { + }; + + luci::CircleSVDF svdf_node; + + TestVisitor tv; + ASSERT_THROW(svdf_node.accept(&tv), std::exception); +} diff --git a/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp new file mode 100644 index 000000000..e1864f8da --- /dev/null +++ b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/Nodes/CircleVariable.h" + +#include "luci/IR/CircleDialect.h" +#include "luci/IR/CircleNodeVisitor.h" + +#include <gtest/gtest.h> + +TEST(CircleVariableTest, constructor) +{ + luci::CircleVariable var_node; + + ASSERT_EQ(luci::CircleDialect::get(), var_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::CIRCLEVARIABLE, var_node.opcode()); +} + +TEST(CircleVariableTest, arity_NEG) +{ + luci::CircleVariable var_node; + + ASSERT_THROW(var_node.arg(0), std::out_of_range); +} + +TEST(CircleVariableTest, visit_mutable_NEG) +{ + struct TestVisitor final : public luci::CircleNodeMutableVisitor<void> + { + }; + + luci::CircleVariable var_node; + + TestVisitor tv; + ASSERT_THROW(var_node.accept(&tv), std::exception); +} + +TEST(CircleVariableTest, visit_NEG) +{ + struct TestVisitor final : public luci::CircleNodeVisitor<void> + { + }; + + luci::CircleVariable var_node; + + TestVisitor tv; + ASSERT_THROW(var_node.accept(&tv), std::exception); +} diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt index aed9fb79b..b8a2111dd 100644 --- a/compiler/luci/logex/CMakeLists.txt +++ b/compiler/luci/logex/CMakeLists.txt @@ -1,5 +1,7 @@ # TODO Find how to test logging-ex utility file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) if (NOT LUCI_LIBRARY_TYPE) set(LUCI_LIBRARY_TYPE "SHARED") @@ -13,7 +15,17 @@ target_link_libraries(luci_logex PRIVATE luci_log) target_link_libraries(luci_logex PRIVATE luci_lang) target_link_libraries(luci_logex PRIVATE hermes_std) target_link_libraries(luci_logex PRIVATE nncc_common) -target_link_libraries(luci_logex PRIVATE pepper_str) install(TARGETS luci_logex DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(luci_logex_test ${TESTS}) +target_include_directories(luci_logex_test PRIVATE src) +target_link_libraries(luci_logex_test luci_logex) +target_link_libraries(luci_logex_test luci_lang) diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp new file mode 100644 index 000000000..eff0830b4 --- /dev/null +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License") + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleNodeSummaryBuilder.h" +#include "CircleNodeSummaryBuilders.h" + +#include <luci/IR/CircleDialect.h> + +#include <memory> + +namespace +{ + +std::string circle_opname(luci::CircleOpcode opcode) +{ + static const std::string prefix{"circle."}; + + switch (opcode) + { +#define CIRCLE_NODE(OPCODE, CLASS) \ + case luci::CircleOpcode::OPCODE: \ + return prefix + #OPCODE; +#define CIRCLE_VNODE CIRCLE_NODE +#include <luci/IR/CircleNodes.lst> +#undef CIRCLE_VNODE +#undef CIRCLE_NODE + default: + break; + }; + + return prefix + "Invalid"; +} + +} // namespace + +namespace luci +{ + +bool CircleNodeSummaryBuilder::build(const loco::Node *node, const locop::SymbolTable *tbl, + locop::NodeSummary &s) +{ + if (node->dialect() != luci::CircleDialect::get()) + return false; + + auto ptr_to_str = [](const void *ptr) { + std::stringstream ss; + ss << ptr; + return ss.str(); + }; + + auto circle_node = loco::must_cast<const luci::CircleNode *>(node); + if (const auto builder = create_builder(circle_node)) + { + if (!builder->validate(circle_node)) + { + s.state(locop::NodeDesc::State::Invalid); + return false; + } + + auto input_names = builder->get_input_names(circle_node); + assert(node->arity() == input_names.size()); + for (uint32_t i = 0; i < node->arity(); ++i) + s.args().append(input_names.at(i), tbl->lookup(node->arg(i))); + + builder->build_attributes(circle_node, s); + builder->update_status(s); + + s.opname(circle_opname(circle_node->opcode())); + s.comments().append("[" + circle_node->name() + "] = " + ptr_to_str(node)); + + return true; + } + else + { + // When SummaryBuilder is not implemented, return false + return false; + } +} + +bool CircleNodeSummaryBuilder::validate(const luci::CircleNode *) { return true; } + +std::vector<std::string> CircleNodeSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + // Return empty names for default + return std::vector<std::string>(); +} + +void CircleNodeSummaryBuilder::build_attributes(const luci::CircleNode *, locop::NodeSummary &) +{ + // Do nothing for default +} + +void CircleNodeSummaryBuilder::update_status(locop::NodeSummary &s) +{ + s.state(locop::NodeDesc::State::Complete); +} + +std::unique_ptr<CircleNodeSummaryBuilder> +CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node) +{ + switch (node->opcode()) + { +#define CIRCLE_NODE(OPCODE, CLASS) \ + case luci::CircleOpcode::OPCODE: \ + { \ + return std::make_unique<CLASS>(); \ + } + + CIRCLE_NODE(ABS, CircleAbsSummaryBuilder) + CIRCLE_NODE(ADD, CircleAddSummaryBuilder) + CIRCLE_NODE(ADD_N, CircleAddNSummaryBuilder) + CIRCLE_NODE(ARG_MAX, CircleArgMaxSummaryBuilder) + CIRCLE_NODE(ARG_MIN, CircleArgMinSummaryBuilder) + CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2DSummaryBuilder) + CIRCLE_NODE(BATCH_MATMUL, CircleBatchMatMulSummaryBuilder) + CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDSummaryBuilder) + CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedSummaryBuilder) + CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherSummaryBuilder) + CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMSummaryBuilder) + CIRCLE_NODE(CAST, CircleCastSummaryBuilder) + CIRCLE_NODE(CEIL, CircleCeilSummaryBuilder) + CIRCLE_NODE(CONCATENATION, CircleConcatenationSummaryBuilder) + CIRCLE_NODE(CIRCLECONST, CircleConstSummaryBuilder) + CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder) + CIRCLE_NODE(COS, CircleCosSummaryBuilder) + CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder) + CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder) + CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder) + CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder) + CIRCLE_NODE(DIV, CircleDivSummaryBuilder) + CIRCLE_NODE(ELU, CircleEluSummaryBuilder) + CIRCLE_NODE(EQUAL, CircleEqualSummaryBuilder) + CIRCLE_NODE(EXP, CircleExpSummaryBuilder) + CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsSummaryBuilder) + CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantSummaryBuilder) + CIRCLE_NODE(FILL, CircleFillSummaryBuilder) + CIRCLE_NODE(FLOOR, CircleFloorSummaryBuilder) + CIRCLE_NODE(FLOOR_DIV, CircleFloorDivSummaryBuilder) + CIRCLE_NODE(FLOOR_MOD, CircleFloorModSummaryBuilder) + CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedSummaryBuilder) + CIRCLE_NODE(GATHER, CircleGatherSummaryBuilder) + CIRCLE_NODE(GATHER_ND, CircleGatherNdSummaryBuilder) + CIRCLE_NODE(GREATER, CircleGreaterSummaryBuilder) + CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqualSummaryBuilder) + CIRCLE_NODE(IF, CircleIfSummaryBuilder) + CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNormSummaryBuilder) + CIRCLE_NODE(L2_NORMALIZATION, CircleL2NormalizeSummaryBuilder) + CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2DSummaryBuilder) + CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluSummaryBuilder) + CIRCLE_NODE(LESS, CircleLessSummaryBuilder) + CIRCLE_NODE(LESS_EQUAL, CircleLessEqualSummaryBuilder) + CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalizationSummaryBuilder) + CIRCLE_NODE(LOG, CircleLogSummaryBuilder) + CIRCLE_NODE(LOGICAL_AND, CircleLogicalAndSummaryBuilder) + CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNotSummaryBuilder) + CIRCLE_NODE(LOGICAL_OR, CircleLogicalOrSummaryBuilder) + CIRCLE_NODE(LOGISTIC, CircleLogisticSummaryBuilder) + CIRCLE_NODE(LOG_SOFTMAX, CircleLogSoftmaxSummaryBuilder) + CIRCLE_NODE(MATRIX_DIAG, CircleMatrixDiagSummaryBuilder) + CIRCLE_NODE(MATRIX_SET_DIAG, CircleMatrixSetDiagSummaryBuilder) + CIRCLE_NODE(MAXIMUM, CircleMaximumSummaryBuilder) + CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2DSummaryBuilder) + CIRCLE_NODE(MEAN, CircleMeanSummaryBuilder) + CIRCLE_NODE(MINIMUM, CircleMinimumSummaryBuilder) + CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadSummaryBuilder) + CIRCLE_NODE(MUL, CircleMulSummaryBuilder) + CIRCLE_NODE(NEG, CircleNegSummaryBuilder) + CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4SummaryBuilder) + CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5SummaryBuilder) + CIRCLE_NODE(NOT_EQUAL, CircleNotEqualSummaryBuilder) + CIRCLE_NODE(ONE_HOT, CircleOneHotSummaryBuilder) + CIRCLE_NODE(PACK, CirclePackSummaryBuilder) + CIRCLE_NODE(PAD, CirclePadSummaryBuilder) + CIRCLE_NODE(PADV2, CirclePadV2SummaryBuilder) + CIRCLE_NODE(POW, CirclePowSummaryBuilder) + CIRCLE_NODE(PRELU, CirclePReluSummaryBuilder) + CIRCLE_NODE(QUANTIZE, CircleQuantizeSummaryBuilder) + CIRCLE_NODE(RANGE, CircleRangeSummaryBuilder) + CIRCLE_NODE(RANK, CircleRankSummaryBuilder) + CIRCLE_NODE(REDUCE_ANY, CircleReduceAnySummaryBuilder) + CIRCLE_NODE(REDUCE_MAX, CircleReduceMaxSummaryBuilder) + CIRCLE_NODE(REDUCE_MIN, CircleReduceMinSummaryBuilder) + CIRCLE_NODE(REDUCE_PROD, CircleReduceProdSummaryBuilder) + CIRCLE_NODE(RELU, CircleReluSummaryBuilder) + CIRCLE_NODE(RELU6, CircleRelu6SummaryBuilder) + CIRCLE_NODE(RELU_N1_TO_1, CircleReluN1To1SummaryBuilder) + CIRCLE_NODE(RESHAPE, CircleReshapeSummaryBuilder) + CIRCLE_NODE(RESIZE_BILINEAR, CircleResizeBilinearSummaryBuilder) + CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, CircleResizeNearestNeighborSummaryBuilder) + CIRCLE_NODE(REVERSE_SEQUENCE, CircleReverseSequenceSummaryBuilder) + CIRCLE_NODE(REVERSE_V2, CircleReverseV2SummaryBuilder) + CIRCLE_NODE(ROUND, CircleRoundSummaryBuilder) + CIRCLE_NODE(RSQRT, CircleRsqrtSummaryBuilder) + CIRCLE_NODE(SCATTER_ND, CircleScatterNdSummaryBuilder) + CIRCLE_NODE(SEGMENT_SUM, CircleSegmentSumSummaryBuilder) + CIRCLE_NODE(SELECT, CircleSelectSummaryBuilder) + CIRCLE_NODE(SELECT_V2, CircleSelectV2SummaryBuilder) + CIRCLE_NODE(SHAPE, CircleShapeSummaryBuilder) + CIRCLE_NODE(SIN, CircleSinSummaryBuilder) + CIRCLE_NODE(SLICE, CircleSliceSummaryBuilder) + CIRCLE_NODE(SOFTMAX, CircleSoftmaxSummaryBuilder) + CIRCLE_NODE(SPACE_TO_BATCH_ND, CircleSpaceToBatchNDSummaryBuilder) + CIRCLE_NODE(SPACE_TO_DEPTH, CircleSpaceToDepthSummaryBuilder) + CIRCLE_NODE(SPARSE_TO_DENSE, CircleSparseToDenseSummaryBuilder) + CIRCLE_NODE(SPLIT, CircleSplitSummaryBuilder) + CIRCLE_NODE(SPLIT_V, CircleSplitVSummaryBuilder) + CIRCLE_NODE(SQRT, CircleSqrtSummaryBuilder) + CIRCLE_NODE(SQUARE, CircleSquareSummaryBuilder) + CIRCLE_NODE(SQUARED_DIFFERENCE, CircleSquaredDifferenceSummaryBuilder) + CIRCLE_NODE(SQUEEZE, CircleSqueezeSummaryBuilder) + CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceSummaryBuilder) + CIRCLE_NODE(SUB, CircleSubSummaryBuilder) + CIRCLE_NODE(SUM, CircleSumSummaryBuilder) + CIRCLE_NODE(SVDF, CircleSVDFSummaryBuilder) + CIRCLE_NODE(TANH, CircleTanhSummaryBuilder) + CIRCLE_NODE(TILE, CircleTileSummaryBuilder) + CIRCLE_NODE(TOPK_V2, CircleTopKV2SummaryBuilder) + CIRCLE_NODE(TRANSPOSE, CircleTransposeSummaryBuilder) + CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvSummaryBuilder) + CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTMSummaryBuilder) + CIRCLE_NODE(UNIQUE, CircleUniqueSummaryBuilder) + CIRCLE_NODE(UNPACK, CircleUnpackSummaryBuilder) + CIRCLE_NODE(WHERE, CircleWhereSummaryBuilder) + CIRCLE_NODE(WHILE, CircleWhileSummaryBuilder) + CIRCLE_NODE(ZEROS_LIKE, CircleZerosLikeSummaryBuilder) + + CIRCLE_NODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, + CircleBidirectionalSequenceLSTMOutSummaryBuilder) + CIRCLE_NODE(CIRCLECUSTOMOUT, CircleCustomOutSummaryBuilder) + CIRCLE_NODE(CIRCLEIFOUT, CircleIfOutSummaryBuilder) + CIRCLE_NODE(CIRCLEINPUT, CircleInputSummaryBuilder) + CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4OutSummaryBuilder) + CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5OutSummaryBuilder) + CIRCLE_NODE(CIRCLEOUTPUT, CircleOutputSummaryBuilder) + CIRCLE_NODE(CIRCLEOUTPUTDUMMY, CircleOutputDummySummaryBuilder) + CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExcludeSummaryBuilder) + CIRCLE_NODE(CIRCLESPLITOUT, CircleSplitOutSummaryBuilder) + CIRCLE_NODE(CIRCLESPLITVOUT, CircleSplitVOutSummaryBuilder) + CIRCLE_NODE(CIRCLETOPKV2OUT, CircleTopKV2OutSummaryBuilder) + CIRCLE_NODE(CIRCLEUNIQUEOUT, CircleUniqueOutSummaryBuilder) + CIRCLE_NODE(CIRCLEUNPACKOUT, CircleUnpackOutSummaryBuilder) + CIRCLE_NODE(CIRCLEVARIABLE, CircleVariableSummaryBuilder) + CIRCLE_NODE(CIRCLEWHILEOUT, CircleWhileOutSummaryBuilder) + + default: + return nullptr; + +#undef CIRCLE_NODE + } +} + +} // namespace luci diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.h b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h new file mode 100644 index 000000000..e21d77310 --- /dev/null +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__ +#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__ + +#include <luci/IR/CircleNode.h> +#include <locop/NodeSummary.h> +#include <locop/SymbolTable.h> + +#include <memory> +#include <sstream> +#include <vector> + +namespace luci +{ + +class CircleNodeSummaryBuilder +{ +public: + bool build(const loco::Node *node, const locop::SymbolTable *tbl, locop::NodeSummary &s); + +private: + /** + * @brief Template methods for building node summary. + * Default behavior is building a node which has no input. + */ + virtual bool validate(const luci::CircleNode *node); + virtual std::vector<std::string> get_input_names(const luci::CircleNode *node); + virtual void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); + virtual void update_status(locop::NodeSummary &s); + +private: + std::unique_ptr<CircleNodeSummaryBuilder> create_builder(const luci::CircleNode *node); +}; + +} // namespace luci + +#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__ diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp new file mode 100644 index 000000000..89ea213e0 --- /dev/null +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleNodeSummaryBuilder.h" + +#include <luci/IR/CircleNodes.h> +#include <locop/NodeSummary.h> +#include <locop/SymbolTable.h> + +#include <gtest/gtest.h> + +namespace +{ + +class MockSymbolTable : public locop::SymbolTable +{ + std::string lookup(const loco::Node *) const override + { + return "Do nothing because it is mocking Symbol Table!"; + } +}; + +class CircleNodeSummaryBuilderTest : public ::testing::Test +{ +protected: + bool mock_build(const loco::Node *node) + { + return luci::CircleNodeSummaryBuilder().build(node, &_tbl, _s); + } + +protected: + MockSymbolTable _tbl; + locop::NodeSummary _s; +}; + +} // namespace + +TEST_F(CircleNodeSummaryBuilderTest, Add_validate) +{ + luci::CircleAdd node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Add_validate_fused_NEG) +{ + luci::CircleAdd node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate) +{ + luci::CircleAveragePool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_fused_NEG) +{ + luci::CircleAveragePool2D node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + node.padding(luci::Padding::SAME); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_padding_NEG) +{ + luci::CircleAveragePool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate) +{ + luci::CircleBCQFullyConnected node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate_fused_NEG) +{ + luci::CircleBCQFullyConnected node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate) +{ + luci::CircleConcatenation node(2); + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate_fused_NEG) +{ + luci::CircleConcatenation node(2); + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate) +{ + luci::CircleConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_fused_NEG) +{ + luci::CircleConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + node.padding(luci::Padding::SAME); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_padding_NEG) +{ + luci::CircleConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate) +{ + luci::CircleDepthwiseConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_fused_NEG) +{ + luci::CircleDepthwiseConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + node.padding(luci::Padding::SAME); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_padding_NEG) +{ + luci::CircleDepthwiseConv2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate) +{ + luci::CircleFullyConnected node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate_fused_NEG) +{ + luci::CircleFullyConnected node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate) +{ + luci::CircleInstanceNorm node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate_fused_NEG) +{ + luci::CircleInstanceNorm node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate) +{ + luci::CircleL2Normalize node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate_fused_NEG) +{ + luci::CircleL2Normalize node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate) +{ + luci::CircleL2Pool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_fused_NEG) +{ + luci::CircleL2Pool2D node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + node.padding(luci::Padding::SAME); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_padding_NEG) +{ + luci::CircleL2Pool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate) +{ + luci::CircleMaxPool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_fused_NEG) +{ + luci::CircleMaxPool2D node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + node.padding(luci::Padding::SAME); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_padding_NEG) +{ + luci::CircleMaxPool2D node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate) +{ + luci::CircleMirrorPad node; + node.mode(luci::MirrorPadMode::REFLECT); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate_mirror_padding_NEG) +{ + luci::CircleMirrorPad node; + node.mode(luci::MirrorPadMode::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Mul_validate) +{ + luci::CircleMul node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, Mul_validate_fused_NEG) +{ + luci::CircleMul node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate) +{ + luci::CircleSVDF node; + node.fusedActivationFunction(luci::FusedActFunc::RELU); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate_fused_NEG) +{ + luci::CircleSVDF node; + node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate) +{ + luci::CircleTransposeConv node; + node.padding(luci::Padding::SAME); + EXPECT_TRUE(mock_build(&node)); +} + +TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate_padding_NEG) +{ + luci::CircleTransposeConv node; + node.padding(luci::Padding::UNDEFINED); + EXPECT_FALSE(mock_build(&node)); +} diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp new file mode 100644 index 000000000..6df9270e3 --- /dev/null +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp @@ -0,0 +1,1128 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleNodeSummaryBuilders.h" + +#include <luci/IR/CircleNode.h> +#include <luci/IR/CircleNodes.h> +#include <loco/IR/Node.h> + +#include <string> +#include <vector> + +namespace +{ + +std::string to_str(loco::DataType type) +{ + switch (type) + { + case loco::DataType::U8: + return "UINT8"; + case loco::DataType::U16: + return "UINT16"; + case loco::DataType::U32: + return "UINT32"; + case loco::DataType::U64: + return "UINT64"; + + case loco::DataType::S8: + return "INT8"; + case loco::DataType::S16: + return "INT16"; + case loco::DataType::S32: + return "INT32"; + case loco::DataType::S64: + return "INT64"; + + case loco::DataType::FLOAT16: + return "FLOAT16"; + case loco::DataType::FLOAT32: + return "FLOAT32"; + case loco::DataType::FLOAT64: + return "FLOAT64"; + + case loco::DataType::BOOL: + return "BOOL"; + + default: + return "Error"; + } +} + +std::string to_str(bool value) { return value ? "true" : "false"; } + +std::string to_str(luci::FusedActFunc fused) +{ + switch (fused) + { + case luci::FusedActFunc::NONE: + return "NONE"; + case luci::FusedActFunc::RELU: + return "RELU"; + case luci::FusedActFunc::RELU_N1_TO_1: + return "RELU_N1_TO_1"; + case luci::FusedActFunc::RELU6: + return "RELU6"; + case luci::FusedActFunc::TANH: + return "TANH"; + case luci::FusedActFunc::SIGN_BIT: + return "SIGN_BIT"; + default: + return "Error"; + } +} + +std::string to_str(luci::Padding padding) +{ + switch (padding) + { + case luci::Padding::SAME: + return "SAME"; + case luci::Padding::VALID: + return "VALID"; + default: + return "Error"; + } +} + +std::string to_str(const luci::Stride *stride) +{ + return std::to_string(stride->h()) + "," + std::to_string(stride->w()); +} + +std::string to_str(const luci::Filter *filter) +{ + return std::to_string(filter->h()) + "," + std::to_string(filter->w()); +} + +std::string to_str(luci::MirrorPadMode mode) +{ + switch (mode) + { + case luci::MirrorPadMode::REFLECT: + return "REFLECT"; + case luci::MirrorPadMode::SYMMETRIC: + return "SYMMETRIC"; + default: + return "Error"; + } +} + +} // namespace + +namespace luci +{ + +std::vector<std::string> CircleNodeWithXSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"x"}; +} + +std::vector<std::string> +CircleNodeWithINPUTSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input"}; +} + +std::vector<std::string> CircleNodeWithXYSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"x", "y"}; +} + +std::vector<std::string> +CircleNodeWithFEATURESSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"features"}; +} + +} // namespace luci + +namespace luci +{ + +bool CircleAddSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto add = loco::must_cast<const luci::CircleAdd *>(node); + if (add->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +void CircleAddSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto add = loco::must_cast<const luci::CircleAdd *>(node); + s.args().append("fused_activation_function", to_str(add->fusedActivationFunction())); +} + +std::vector<std::string> CircleAddNSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + return std::vector<std::string>(node->arity(), "inputs"); +} + +std::vector<std::string> CircleArgMaxSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "dimension"}; +} + +void CircleArgMaxSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto argmax = loco::must_cast<const luci::CircleArgMax *>(node); + s.args().append("output_type", to_str(argmax->output_type())); +} + +std::vector<std::string> CircleArgMinSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "dimension"}; +} + +void CircleArgMinSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto argmin = loco::must_cast<const luci::CircleArgMin *>(node); + s.args().append("output_type", to_str(argmin->output_type())); +} + +bool CircleAveragePool2DSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node); + if (avgpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + if (avgpool->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleAveragePool2DSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"value"}; +} + +void CircleAveragePool2DSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node); + s.args().append("filter(h,w)", to_str(avgpool->filter())); + s.args().append("stride(h,w)", to_str(avgpool->stride())); + s.args().append("padding", to_str(avgpool->padding())); + s.args().append("fused_activation_function", to_str(avgpool->fusedActivationFunction())); +} + +void CircleBatchMatMulSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto batchmatmul = loco::must_cast<const luci::CircleBatchMatMul *>(node); + s.args().append("adj_x", to_str(batchmatmul->adj_x())); + s.args().append("adj_y", to_str(batchmatmul->adj_y())); +} + +std::vector<std::string> +CircleBatchToSpaceNDSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "block_shape", "crops"}; +} + +bool CircleBCQFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node); + if (bcq_fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleBCQFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "weights_scales", "weights_binary", "bias", "weights_clusters"}; +} + +void CircleBCQFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node); + s.args().append("fused_activation_function", to_str(bcq_fc->fusedActivationFunction())); + s.args().append("weights_hidden_size", std::to_string(bcq_fc->weights_hidden_size())); +} + +std::vector<std::string> CircleBCQGatherSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input_scales", "input_binary", "indices", "input_clusters"}; +} + +void CircleBCQGatherSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto bcq_gather = loco::must_cast<const luci::CircleBCQGather *>(node); + s.args().append("axis", std::to_string(bcq_gather->axis())); + s.args().append("input_hidden_size", std::to_string(bcq_gather->input_hidden_size())); +} + +std::vector<std::string> +CircleBidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", + "fw_input_to_input_weights", + "fw_input_to_forget_weights", + "fw_input_to_cell_weights", + "fw_input_to_output_weights", + "fw_recurrent_to_input_weights", + "fw_recurrent_to_forget_weights", + "fw_recurrent_to_cell_weights", + "fw_recurrent_to_output_weights", + "fw_cell_to_input_weights", + "fw_cell_to_forget_weights", + "fw_cell_to_output_weights", + "fw_input_gate_bias", + "fw_forget_gate_bias", + "fw_cell_gate_bias", + "fw_output_gate_bias", + "fw_projection_weights", + "fw_projection_bias", + "bw_input_to_input_weights", + "bw_input_to_forget_weights", + "bw_input_to_cell_weights", + "bw_input_to_output_weights", + "bw_recurrent_to_input_weights", + "bw_recurrent_to_forget_weights", + "bw_recurrent_to_cell_weights", + "bw_recurrent_to_output_weights", + "bw_cell_to_input_weights", + "bw_cell_to_forget_weights", + "bw_cell_to_output_weights", + "bw_input_gate_bias", + "bw_forget_gate_bias", + "bw_cell_gate_bias", + "bw_output_gate_bias", + "bw_projection_weights", + "bw_projection_bias", + "fw_activation_state", + "fw_cell_state", + "bw_activation_state", + "bw_cell_state", + "auxillary_input", + "fw_auxillary_input_to_input_weights", + "fw_auxillary_input_to_forget_weights", + "fw_auxillary_input_to_cell_weights", + "fw_auxillary_input_to_output_weights", + "bw_auxillary_input_to_input_weights", + "bw_auxillary_input_to_forget_weights", + "bw_auxillary_input_to_cell_weights", + "bw_auxillary_input_to_output_weights"}; +} + +void CircleBidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto lstm = loco::must_cast<const luci::CircleBidirectionalSequenceLSTM *>(node); + s.args().append("cell_clip", to_str(lstm->cell_clip())); + s.args().append("proj_clip", to_str(lstm->proj_clip())); + s.args().append("merge_outputs", to_str(lstm->merge_outputs())); + s.args().append("time_major", to_str(lstm->time_major())); + s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs())); +} + +std::vector<std::string> CircleCastSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"x"}; +} + +void CircleCastSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto cast = loco::must_cast<const luci::CircleCast *>(node); + s.args().append("in_data_type", to_str(cast->in_data_type())); + s.args().append("out_data_type", to_str(cast->out_data_type())); +} + +bool CircleConcatenationSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto concat = loco::must_cast<const luci::CircleConcatenation *>(node); + if (concat->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleConcatenationSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + return std::vector<std::string>(node->arity(), "values"); +} + +void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto concat = loco::must_cast<const luci::CircleConcatenation *>(node); + s.args().append("axis", std::to_string(concat->axis())); + s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction())); +} + +void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s) +{ + s.state(locop::NodeDesc::State::PartiallyKnown); +} + +bool CircleConv2DSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node); + if (conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + if (conv2d->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleConv2DSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "filter", "bias"}; +} + +void CircleConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node); + s.args().append("stride(h,w)", to_str(conv2d->stride())); + s.args().append("dilation(h,w)", to_str(conv2d->dilation())); + s.args().append("padding", to_str(conv2d->padding())); + s.args().append("fused_activation_function", to_str(conv2d->fusedActivationFunction())); +} + +std::vector<std::string> CircleCustomSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + auto input_names = std::vector<std::string>(); + for (uint32_t i = 0; i < node->arity(); ++i) + input_names.push_back("input" + std::to_string(i)); + return input_names; +} + +void CircleCustomSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto custom = loco::must_cast<const luci::CircleCustom *>(node); + s.args().append("custom_code", custom->custom_code()); +} + +void CircleDepthToSpaceSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto depth_to_space = loco::must_cast<const luci::CircleDepthToSpace *>(node); + s.args().append("block_size", std::to_string(depth_to_space->block_size())); +} + +bool CircleDepthwiseConv2DSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node); + if (dw_conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + if (dw_conv2d->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleDepthwiseConv2DSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "filter", "bias"}; +} + +void CircleDepthwiseConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node); + s.args().append("stride(h,w)", to_str(dw_conv2d->stride())); + s.args().append("dilation(h,w)", to_str(dw_conv2d->dilation())); + s.args().append("padding", to_str(dw_conv2d->padding())); + s.args().append("depthMultiplier", std::to_string(dw_conv2d->depthMultiplier())); + s.args().append("fused_activation_function", to_str(dw_conv2d->fusedActivationFunction())); +} + +std::vector<std::string> CircleExpandDimsSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "axis"}; +} + +std::vector<std::string> CircleFakeQuantSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"inputs"}; +} + +void CircleFakeQuantSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto fake_quant = loco::must_cast<const luci::CircleFakeQuant *>(node); + s.args().append("min", std::to_string(fake_quant->min())); + s.args().append("max", std::to_string(fake_quant->max())); + s.args().append("num_bits", std::to_string(fake_quant->num_bits())); + s.args().append("narrow_range", to_str(fake_quant->narrow_range())); +} + +std::vector<std::string> CircleFillSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"dims", "value"}; +} + +bool CircleFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node); + if (fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "weights", "bias"}; +} + +void CircleFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node); + s.args().append("fused_activation_function", to_str(fc->fusedActivationFunction())); +} + +std::vector<std::string> CircleGatherSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"params", "indices"}; +} + +void CircleGatherSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto gather = loco::must_cast<const luci::CircleGather *>(node); + s.args().append("axis", std::to_string(gather->axis())); +} + +std::vector<std::string> CircleGatherNdSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"params", "indices"}; +} + +std::vector<std::string> CircleIfSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + auto circle_if = loco::must_cast<const luci::CircleIf *>(node); + + auto input_names = std::vector<std::string>(); + input_names.push_back("cond"); + for (uint32_t i = 0; i < circle_if->input_count(); ++i) + input_names.push_back("input"); + + return input_names; +} + +void CircleIfSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto circle_if = loco::must_cast<const luci::CircleIf *>(node); + + if (circle_if->then_graph() != nullptr) + s.args().append("then_graph", circle_if->then_graph()->name()); + else + s.args().append("then_branch", std::to_string(circle_if->then_branch())); + + if (circle_if->else_graph() != nullptr) + s.args().append("else_graph", circle_if->else_graph()->name()); + else + s.args().append("else_branch", std::to_string(circle_if->else_branch())); +} + +bool CircleInstanceNormSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node); + if (instnorm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleInstanceNormSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "gamma", "beta"}; +} + +void CircleInstanceNormSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node); + s.args().append("epsilon", std::to_string(instnorm->epsilon())); + s.args().append("fused_activation_function", to_str(instnorm->fusedActivationFunction())); +} + +bool CircleL2NormalizeSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node); + if (l2norm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleL2NormalizeSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"x"}; +} + +void CircleL2NormalizeSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node); + s.args().append("fused_activation_function", to_str(l2norm->fusedActivationFunction())); +} + +bool CircleL2Pool2DSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node); + if (l2pool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + if (l2pool->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleL2Pool2DSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"value"}; +} + +void CircleL2Pool2DSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node); + s.args().append("filter(h,w)", to_str(l2pool->filter())); + s.args().append("stride(h,w)", to_str(l2pool->stride())); + s.args().append("padding", to_str(l2pool->padding())); + s.args().append("fused_activation_function", to_str(l2pool->fusedActivationFunction())); +} + +void CircleLeakyReluSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto leaky_relu = loco::must_cast<const luci::CircleLeakyRelu *>(node); + s.args().append("alpha", std::to_string(leaky_relu->alpha())); +} + +void CircleLocalResponseNormalizationSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto lrn = loco::must_cast<const luci::CircleLocalResponseNormalization *>(node); + s.args().append("radius", std::to_string(lrn->radius())); + s.args().append("bias", std::to_string(lrn->bias())); + s.args().append("alpha", std::to_string(lrn->alpha())); + s.args().append("beta", std::to_string(lrn->beta())); +} + +std::vector<std::string> CircleLogSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"logits"}; +} + +std::vector<std::string> CircleMatrixDiagSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"diagonal"}; +} + +std::vector<std::string> +CircleMatrixSetDiagSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "diagonal"}; +} + +bool CircleMaxPool2DSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node); + if (maxpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + if (maxpool->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleMaxPool2DSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"value"}; +} + +void CircleMaxPool2DSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node); + s.args().append("filter(h,w)", to_str(maxpool->filter())); + s.args().append("stride(h,w)", to_str(maxpool->stride())); + s.args().append("padding", to_str(maxpool->padding())); + s.args().append("fused_activation_function", to_str(maxpool->fusedActivationFunction())); +} + +bool CircleMirrorPadSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node); + if (mirror_pad->mode() == luci::MirrorPadMode::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleMirrorPadSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "paddings"}; +} + +void CircleMirrorPadSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node); + s.args().append("mode", to_str(mirror_pad->mode())); +} + +bool CircleMulSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto mul = loco::must_cast<const luci::CircleMul *>(node); + if (mul->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +void CircleMulSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto mul = loco::must_cast<const luci::CircleMul *>(node); + s.args().append("fused_activation_function", to_str(mul->fusedActivationFunction())); +} + +std::vector<std::string> +CircleNonMaxSuppressionV4SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"boxes", "scores", "max_output_size", "iou_threshold", "score_threshold"}; +} + +std::vector<std::string> +CircleNonMaxSuppressionV5SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"boxes", "scores", "max_output_size", + "iou_threshold", "score_threshold", "soft_nms_sigma"}; +} + +std::vector<std::string> CircleOneHotSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"indices", "depth", "on_value", "off_value"}; +} + +void CircleOneHotSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto onehot = loco::must_cast<const luci::CircleOneHot *>(node); + s.args().append("axis", std::to_string(onehot->axis())); +} + +std::vector<std::string> CirclePackSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + return std::vector<std::string>(node->arity(), "values"); +} + +void CirclePackSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto pack = loco::must_cast<const luci::CirclePack *>(node); + s.args().append("values_count", std::to_string(pack->values_count())); + s.args().append("axis", std::to_string(pack->axis())); +} + +std::vector<std::string> CirclePadSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "paddings"}; +} + +std::vector<std::string> CirclePadV2SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "paddings", "constant_values"}; +} + +std::vector<std::string> CirclePReluSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "alpha"}; +} + +std::vector<std::string> CircleRangeSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"start", "limit", "delta"}; +} + +std::vector<std::string> CircleReshapeSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"tensor", "shape"}; +} + +void CircleReshapeSummaryBuilder::update_status(locop::NodeSummary &s) +{ + s.state(locop::NodeDesc::State::PartiallyKnown); +} + +std::vector<std::string> +CircleResizeBilinearSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "size"}; +} + +void CircleResizeBilinearSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto resize_bilinear = loco::must_cast<const luci::CircleResizeBilinear *>(node); + s.args().append("align_corners", to_str(resize_bilinear->align_corners())); + s.args().append("half_pixel_centers", to_str(resize_bilinear->half_pixel_centers())); +} + +std::vector<std::string> +CircleResizeNearestNeighborSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "size"}; +} + +void CircleResizeNearestNeighborSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto resize_nn = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(node); + s.args().append("align_corners", to_str(resize_nn->align_corners())); +} + +std::vector<std::string> +CircleReverseSequenceSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "seq_lengths"}; +} + +void CircleReverseSequenceSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto reverse_seq = loco::must_cast<const luci::CircleReverseSequence *>(node); + s.args().append("seq_axis", std::to_string(reverse_seq->seq_axis())); + s.args().append("batch_axis", std::to_string(reverse_seq->batch_axis())); +} + +std::vector<std::string> CircleReverseV2SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"tensor", "axis"}; +} + +std::vector<std::string> CircleScatterNdSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"indices", "updates", "shape"}; +} + +std::vector<std::string> CircleSegmentSumSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "segment_ids"}; +} + +std::vector<std::string> CircleSelectSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"condition", "t", "e"}; +} + +std::vector<std::string> CircleSelectV2SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"condition", "t", "e"}; +} + +void CircleShapeSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto shape = loco::must_cast<const luci::CircleShape *>(node); + s.args().append("out_type", to_str(shape->out_type())); +} + +std::vector<std::string> CircleSliceSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "begin", "size"}; +} + +std::vector<std::string> CircleSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"logits"}; +} + +void CircleSoftmaxSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto softmax = loco::must_cast<const luci::CircleSoftmax *>(node); + s.args().append("beta", to_str(softmax->beta())); +} + +std::vector<std::string> +CircleSpaceToBatchNDSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "block_shape", "paddings"}; +} + +void CircleSpaceToDepthSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto space_to_depth = loco::must_cast<const luci::CircleSpaceToDepth *>(node); + s.args().append("block_size", to_str(space_to_depth->block_size())); +} + +std::vector<std::string> +CircleSparseToDenseSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"indices", "output_shape", "values", "default_value"}; +} + +void CircleSparseToDenseSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto sparse_to_dense = loco::must_cast<const luci::CircleSparseToDense *>(node); + s.args().append("validate_indices", to_str(sparse_to_dense->validate_indices())); +} + +std::vector<std::string> CircleSplitSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"split_dim", "input"}; +} + +void CircleSplitSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto split = loco::must_cast<const luci::CircleSplit *>(node); + s.args().append("num_split", std::to_string(split->num_split())); +} + +std::vector<std::string> CircleSplitVSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "size_splits", "split_dim"}; +} + +void CircleSplitVSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto split_v = loco::must_cast<const luci::CircleSplitV *>(node); + s.args().append("num_split", std::to_string(split_v->num_split())); +} + +void CircleSqueezeSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto squeeze = loco::must_cast<const luci::CircleSqueeze *>(node); + + std::string squeeze_dims = "("; + for (size_t i = 0; i < squeeze->squeeze_dims().size(); ++i) + { + if (i != 0) + squeeze_dims += ", "; + squeeze_dims += std::to_string(squeeze->squeeze_dims().at(i)); + } + squeeze_dims += ")"; + + s.args().append("squeeze_dims", squeeze_dims); +} + +std::vector<std::string> CircleStridedSliceSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "begin", "end", "strides"}; +} + +void CircleStridedSliceSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto strided_slice = loco::must_cast<const luci::CircleStridedSlice *>(node); + s.args().append("begin_mask", std::to_string(strided_slice->begin_mask())); + s.args().append("end_mask", std::to_string(strided_slice->end_mask())); + s.args().append("ellipsis_mask", std::to_string(strided_slice->ellipsis_mask())); + s.args().append("new_axis_mask", std::to_string(strided_slice->new_axis_mask())); + s.args().append("shrink_axis_mask", std::to_string(strided_slice->shrink_axis_mask())); +} + +bool CircleSVDFSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto svdf = loco::must_cast<const luci::CircleSVDF *>(node); + if (svdf->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> CircleSVDFSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "weight_feature", "weight_time", "bias", "State"}; +} + +void CircleSVDFSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) +{ + auto svdf = loco::must_cast<const luci::CircleSVDF *>(node); + s.args().append("rank", to_str(svdf->svdf_rank())); + s.args().append("asymmetric_quantize_inputs", to_str(svdf->asymmetric_quantize_inputs())); + s.args().append("fused_activation_function", to_str(svdf->fusedActivationFunction())); +} + +std::vector<std::string> CircleTileSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "multiples"}; +} + +std::vector<std::string> CircleTopKV2SummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "k"}; +} + +std::vector<std::string> CircleTransposeSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"a", "perm"}; +} + +bool CircleTransposeConvSummaryBuilder::validate(const luci::CircleNode *node) +{ + auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node); + if (transpose_conv->padding() == luci::Padding::UNDEFINED) + return false; + + return true; +} + +std::vector<std::string> +CircleTransposeConvSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"inputSizes", "filter", "outBackProp", "bias"}; +} + +void CircleTransposeConvSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node); + s.args().append("stride(h,w)", to_str(transpose_conv->stride())); + s.args().append("padding", to_str(transpose_conv->padding())); +} + +std::vector<std::string> +CircleUnidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", + "input_to_input_weights", + "input_to_forget_weights", + "input_to_cell_weights", + "input_to_output_weights", + "recurrent_to_input_weights", + "recurrent_to_forget_weights", + "recurrent_to_cell_weights", + "recurrent_to_output_weights", + "cell_to_input_weights", + "cell_to_forget_weights", + "cell_to_output_weights", + "input_gate_bias", + "forget_gate_bias", + "cell_gate_bias", + "output_gate_bias", + "projection_weights", + "projection_bias", + "activation_state", + "cell_state", + "input_layer_norm_coefficients", + "forget_layer_norm_coefficients", + "cell_layer_norm_coefficients", + "output_layer_norm_coefficients"}; +} + +void CircleUnidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto lstm = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(node); + s.args().append("cell_clip", to_str(lstm->cell_clip())); + s.args().append("proj_clip", to_str(lstm->proj_clip())); + s.args().append("time_major", to_str(lstm->time_major())); + s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs())); +} + +void CircleUniqueSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto unique = loco::must_cast<const luci::CircleUnique *>(node); + s.args().append("idx_out_type", to_str(unique->idx_out_type())); +} + +std::vector<std::string> CircleUnpackSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"value"}; +} + +void CircleUnpackSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto unpack = loco::must_cast<const luci::CircleUnpack *>(node); + s.args().append("num", std::to_string(unpack->num())); + s.args().append("axis", std::to_string(unpack->axis())); +} +std::vector<std::string> CircleWhereSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"condition"}; +} + +std::vector<std::string> CircleWhileSummaryBuilder::get_input_names(const luci::CircleNode *node) +{ + auto circle_while = loco::must_cast<const luci::CircleWhile *>(node); + + auto input_names = std::vector<std::string>(); + for (uint32_t i = 0; i < circle_while->input_count(); ++i) + input_names.push_back("input"); + + return input_names; +} + +void CircleWhileSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto circle_while = loco::must_cast<const luci::CircleWhile *>(node); + + if (circle_while->cond_graph() != nullptr) + s.args().append("then_graph", circle_while->cond_graph()->name()); + else + s.args().append("then_branch", std::to_string(circle_while->cond_branch())); + + if (circle_while->body_graph() != nullptr) + s.args().append("else_graph", circle_while->body_graph()->name()); + else + s.args().append("else_branch", std::to_string(circle_while->body_branch())); +} + +std::vector<std::string> CircleOutputSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"from"}; +} + +std::vector<std::string> CircleTopKV2OutSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"topkv2"}; +} + +std::vector<std::string> CircleUniqueOutSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"unique"}; +} + +std::vector<std::string> CircleUnpackOutSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"unpack"}; +} + +std::vector<std::string> CircleWhileOutSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"while"}; +} + +} // namespace luci diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h new file mode 100644 index 000000000..6cd24b7f1 --- /dev/null +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__ +#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__ + +#include "CircleNodeSummaryBuilder.h" + +#include <luci/IR/CircleNode.h> + +#include <string> +#include <vector> + +namespace luci +{ + +class CircleNodeWithXSummaryBuilder : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleNodeWithINPUTSummaryBuilder : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleNodeWithXYSummaryBuilder : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleNodeWithFEATURESSummaryBuilder : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +template <class REDUCER_NODE> +class CircleNodeWithReducerSummaryBuilder : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *) + { + return {"input", "reduction_indices"}; + } + + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s) + { + auto mean = loco::must_cast<const REDUCER_NODE *>(node); + s.args().append("keep_dims", mean->keep_dims() ? "true" : "false"); + } +}; + +} // namespace luci + +namespace luci +{ + +class CircleAbsSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleAddSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleAddNSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *node); +}; + +class CircleArgMaxSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleArgMinSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleAveragePool2DSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleBatchMatMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleBatchToSpaceNDSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleBCQFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleBCQGatherSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleBidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleCastSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleCeilSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleConcatenationSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + void update_status(locop::NodeSummary &s); +}; + +class CircleConv2DSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleCosSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleCustomSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleDepthwiseConv2DSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleDequantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleEluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder +{ +}; + +class CircleEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleExpSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleExpandDimsSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleFakeQuantSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleFillSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleFloorSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleFloorDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleFloorModSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleGatherSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleGatherNdSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleGreaterSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleGreaterEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleIfSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleInstanceNormSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleL2NormalizeSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleL2Pool2DSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleLeakyReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleLessSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleLessEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleLocalResponseNormalizationSummaryBuilder final + : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleLogSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleLogicalAndSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleLogicalNotSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleLogicalOrSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleLogisticSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleLogSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleMatrixDiagSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleMatrixSetDiagSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleMaximumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleMaxPool2DSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleMeanSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleMean> +{ +}; + +class CircleMinimumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleMirrorPadSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleNegSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleNonMaxSuppressionV4SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleNonMaxSuppressionV5SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleNotEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleOneHotSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CirclePackSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CirclePadSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CirclePadV2SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CirclePowSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CirclePReluSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleQuantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleRangeSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleRankSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleReduceAnySummaryBuilder final + : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceAny> +{ +}; + +class CircleReduceMaxSummaryBuilder final + : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMax> +{ +}; + +class CircleReduceMinSummaryBuilder final + : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMin> +{ +}; + +class CircleReduceProdSummaryBuilder final + : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceProd> +{ +}; + +class CircleReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder +{ +}; + +class CircleRelu6SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder +{ +}; + +class CircleReluN1To1SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder +{ +}; + +class CircleReshapeSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void update_status(locop::NodeSummary &s); +}; + +class CircleResizeBilinearSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleResizeNearestNeighborSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleReverseSequenceSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleReverseV2SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleRoundSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleRsqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleScatterNdSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleSegmentSumSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleSelectSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleSelectV2SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleShapeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSinSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleSliceSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSpaceToBatchNDSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleSpaceToDepthSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSparseToDenseSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSplitSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSplitVSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleSquareSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleSquaredDifferenceSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleSqueezeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleStridedSliceSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleSubSummaryBuilder final : public CircleNodeWithXYSummaryBuilder +{ +}; + +class CircleSumSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleSum> +{ +}; + +class CircleSVDFSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleTanhSummaryBuilder final : public CircleNodeWithXSummaryBuilder +{ +}; + +class CircleTileSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleTopKV2SummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleTransposeSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleTransposeConvSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + bool validate(const luci::CircleNode *node); + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleUnidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleUniqueSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleUnpackSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleWhereSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleWhileSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *node); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + +class CircleZerosLikeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleBidirectionalSequenceLSTMOutSummaryBuilder final + : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleCustomOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleIfOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleInputSummaryBuilder final : public CircleNodeSummaryBuilder +{ +}; + +class CircleNonMaxSuppressionV4OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleNonMaxSuppressionV5OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleOutputSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleOutputDummySummaryBuilder final : public CircleNodeSummaryBuilder +{ +}; + +class CircleOutputExcludeSummaryBuilder final : public CircleNodeSummaryBuilder +{ +}; + +class CircleSplitOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleSplitVOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + +class CircleTopKV2OutSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleUniqueOutSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleUnpackOutSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +class CircleVariableSummaryBuilder final : public CircleNodeSummaryBuilder +{ +}; + +class CircleWhileOutSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector<std::string> get_input_names(const luci::CircleNode *); +}; + +} // namespace luci + +#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__ diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp index 0588ed79e..d3b2170b0 100644 --- a/compiler/luci/logex/src/FormattedGraph.cpp +++ b/compiler/luci/logex/src/FormattedGraph.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "CircleNodeSummaryBuilder.h" #include "luci/FormattedGraph.h" #include <luci/IR/CircleDialect.h> @@ -25,2179 +26,6 @@ #include <sstream> #include <vector> -using namespace luci; -/** - * @brief dump std::vector<int64_t> values to stream - */ -std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64) -{ - for (auto vi : vi64) - { - os << vi << " "; - } - return os; -} - -// For TF lite -namespace -{ - -const char *to_str(loco::DataType type) -{ - switch (type) - { - case loco::DataType::U8: - return "UINT8"; - case loco::DataType::U16: - return "UINT16"; - case loco::DataType::U32: - return "UINT32"; - case loco::DataType::U64: - return "UINT64"; - - case loco::DataType::S8: - return "INT8"; - case loco::DataType::S16: - return "INT16"; - case loco::DataType::S32: - return "INT32"; - case loco::DataType::S64: - return "INT64"; - - case loco::DataType::FLOAT16: - return "FLOAT16"; - case loco::DataType::FLOAT32: - return "FLOAT32"; - case loco::DataType::FLOAT64: - return "FLOAT64"; - - case loco::DataType::BOOL: - return "BOOL"; - - default: - return "Error"; - } -} - -const char *to_str(bool value) { return value ? "true" : "false"; } - -const char *to_str(luci::FusedActFunc fused) -{ - switch (fused) - { - case luci::FusedActFunc::NONE: - return "NONE"; - case luci::FusedActFunc::RELU: - return "RELU"; - case luci::FusedActFunc::RELU_N1_TO_1: - return "RELU_N1_TO_1"; - case luci::FusedActFunc::RELU6: - return "RELU6"; - case luci::FusedActFunc::TANH: - return "TANH"; - case luci::FusedActFunc::SIGN_BIT: - return "SIGN_BIT"; - default: - return "Error"; - } -} - -const char *to_str(luci::Padding padding) -{ - switch (padding) - { - case luci::Padding::SAME: - return "SAME"; - case luci::Padding::VALID: - return "VALID"; - default: - return "Error"; - } -} - -const char *to_str(luci::MirrorPadMode mode) -{ - switch (mode) - { - case luci::MirrorPadMode::REFLECT: - return "REFLECT"; - case luci::MirrorPadMode::SYMMETRIC: - return "SYMMETRIC"; - default: - return "Error"; - } -} - -std::string to_str(const luci::Stride *stride) -{ - return pepper::str(stride->h(), ",", stride->w()); -} - -std::string to_str(const luci::Filter *filter) -{ - return pepper::str(filter->h(), ",", filter->w()); -} - -std::string circle_opname(uint32_t opnum) -{ - static const std::string prefix{"circle."}; - - switch (static_cast<luci::CircleOpcode>(opnum)) - { -#define CIRCLE_NODE(OPCODE, CLASS) \ - case luci::CircleOpcode::OPCODE: \ - return prefix + #OPCODE; -#define CIRCLE_VNODE CIRCLE_NODE -#include <luci/IR/CircleNodes.lst> -#undef CIRCLE_VNODE -#undef CIRCLE_NODE - default: - break; - }; - - return prefix + "Invalid"; -} - -// CircleNodeSummaryBuilder with default implementation -class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder -{ -public: - CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl} - { - // DO NOTHING - } - -public: - bool build(const loco::Node *, locop::NodeSummary &s) const final; - -protected: -#define CIRCLE_NODE(OPCODE, CLASS) \ - virtual bool summary(const CLASS *, locop::NodeSummary &) const { return false; } -#define CIRCLE_VNODE CIRCLE_NODE -#include <luci/IR/CircleNodes.lst> -#undef CIRCLE_VNODE -#undef CIRCLE_NODE - -protected: - const locop::SymbolTable *tbl(void) const { return _tbl; } - -private: - const locop::SymbolTable *_tbl; -}; - -template <class CIRCLENODE> -bool use_x(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("x", tbl->lookup(node->x())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_input(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_features(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("features", tbl->lookup(node->features())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_xy(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("x", tbl->lookup(node->x())); - s.args().append("y", tbl->lookup(node->y())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_xy_act(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("x", tbl->lookup(node->x())); - s.args().append("y", tbl->lookup(node->y())); - s.args().append("fused_activation_function", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_reducer(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("reduction_indices", tbl->lookup(node->reduction_indices())); - s.args().append("keep_dims", node->keep_dims() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -template <class CIRCLENODE> -bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("dimension", tbl->lookup(node->dimension())); - s.args().append("output_type", to_str(node->output_type())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node, - locop::NodeSummary &s) -{ - for (uint32_t i = 0; i < node->arity(); ++i) - s.args().append("inputs", tbl->lookup(node->inputs(i))); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("value", tbl->lookup(node->value())); - s.args().append("filter(h,w)", to_str(node->filter())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node, - locop::NodeSummary &s) -{ - s.args().append("x", tbl->lookup(node->x())); - s.args().append("y", tbl->lookup(node->y())); - s.args().append("adj_x", to_str(node->adj_x())); - s.args().append("adj_y", to_str(node->adj_y())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("block_shape", tbl->lookup(node->block_shape())); - s.args().append("crops", tbl->lookup(node->crops())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBidirectionalSequenceLSTM *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - - s.args().append("fw_input_to_input_weights", tbl->lookup(node->fw_input_to_input_weights())); - s.args().append("fw_input_to_forget_weights", tbl->lookup(node->fw_input_to_forget_weights())); - s.args().append("fw_input_to_cell_weights", tbl->lookup(node->fw_input_to_cell_weights())); - s.args().append("fw_input_to_output_weights", tbl->lookup(node->fw_input_to_output_weights())); - - s.args().append("fw_recurrent_to_input_weights", - tbl->lookup(node->fw_recurrent_to_input_weights())); - s.args().append("fw_recurrent_to_forget_weights", - tbl->lookup(node->fw_recurrent_to_forget_weights())); - s.args().append("fw_recurrent_to_cell_weights", - tbl->lookup(node->fw_recurrent_to_cell_weights())); - s.args().append("fw_recurrent_to_output_weights", - tbl->lookup(node->fw_recurrent_to_output_weights())); - - s.args().append("fw_cell_to_input_weights", tbl->lookup(node->fw_cell_to_input_weights())); - s.args().append("fw_cell_to_forget_weights", tbl->lookup(node->fw_cell_to_forget_weights())); - s.args().append("fw_cell_to_output_weights", tbl->lookup(node->fw_cell_to_output_weights())); - - s.args().append("fw_input_gate_bias", tbl->lookup(node->fw_input_gate_bias())); - s.args().append("fw_forget_gate_bias", tbl->lookup(node->fw_forget_gate_bias())); - s.args().append("fw_cell_gate_bias", tbl->lookup(node->fw_cell_gate_bias())); - s.args().append("fw_output_gate_bias", tbl->lookup(node->fw_output_gate_bias())); - - s.args().append("fw_projection_weights", tbl->lookup(node->fw_projection_weights())); - s.args().append("fw_projection_bias", tbl->lookup(node->fw_projection_bias())); - - s.args().append("bw_input_to_input_weights", tbl->lookup(node->bw_input_to_input_weights())); - s.args().append("bw_input_to_forget_weights", tbl->lookup(node->bw_input_to_forget_weights())); - s.args().append("bw_input_to_cell_weights", tbl->lookup(node->bw_input_to_cell_weights())); - s.args().append("bw_input_to_output_weights", tbl->lookup(node->bw_input_to_output_weights())); - - s.args().append("bw_recurrent_to_input_weights", - tbl->lookup(node->bw_recurrent_to_input_weights())); - s.args().append("bw_recurrent_to_forget_weights", - tbl->lookup(node->bw_recurrent_to_forget_weights())); - s.args().append("bw_recurrent_to_cell_weights", - tbl->lookup(node->bw_recurrent_to_cell_weights())); - s.args().append("bw_recurrent_to_output_weights", - tbl->lookup(node->bw_recurrent_to_output_weights())); - - s.args().append("bw_cell_to_input_weights", tbl->lookup(node->bw_cell_to_input_weights())); - s.args().append("bw_cell_to_forget_weights", tbl->lookup(node->bw_cell_to_forget_weights())); - s.args().append("bw_cell_to_output_weights", tbl->lookup(node->bw_cell_to_output_weights())); - - s.args().append("bw_input_gate_bias", tbl->lookup(node->bw_input_gate_bias())); - s.args().append("bw_forget_gate_bias", tbl->lookup(node->bw_forget_gate_bias())); - s.args().append("bw_cell_gate_bias", tbl->lookup(node->bw_cell_gate_bias())); - s.args().append("bw_output_gate_bias", tbl->lookup(node->bw_output_gate_bias())); - - s.args().append("bw_projection_weights", tbl->lookup(node->bw_projection_weights())); - s.args().append("bw_projection_bias", tbl->lookup(node->bw_projection_bias())); - - s.args().append("fw_activation_state", tbl->lookup(node->fw_activation_state())); - s.args().append("fw_cell_state", tbl->lookup(node->fw_cell_state())); - s.args().append("bw_activation_state", tbl->lookup(node->bw_activation_state())); - s.args().append("bw_cell_state", tbl->lookup(node->bw_cell_state())); - - s.args().append("auxillary_input", tbl->lookup(node->auxillary_input())); - s.args().append("fw_auxillary_input_to_input_weights", - tbl->lookup(node->fw_auxillary_input_to_input_weights())); - s.args().append("fw_auxillary_input_to_forget_weights", - tbl->lookup(node->fw_auxillary_input_to_forget_weights())); - s.args().append("fw_auxillary_input_to_cell_weights", - tbl->lookup(node->fw_auxillary_input_to_cell_weights())); - s.args().append("fw_auxillary_input_to_output_weights", - tbl->lookup(node->fw_auxillary_input_to_output_weights())); - s.args().append("bw_auxillary_input_to_input_weights", - tbl->lookup(node->bw_auxillary_input_to_input_weights())); - s.args().append("bw_auxillary_input_to_forget_weights", - tbl->lookup(node->bw_auxillary_input_to_forget_weights())); - s.args().append("bw_auxillary_input_to_cell_weights", - tbl->lookup(node->bw_auxillary_input_to_cell_weights())); - s.args().append("bw_auxillary_input_to_output_weights", - tbl->lookup(node->bw_auxillary_input_to_output_weights())); - - s.args().append("cell_clip", to_str(node->cell_clip())); - s.args().append("proj_clip", to_str(node->proj_clip())); - s.args().append("merge_outputs", to_str(node->merge_outputs())); - s.args().append("time_major", to_str(node->time_major())); - s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs())); - - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node, - locop::NodeSummary &s) -{ - s.args().append("x", tbl->lookup(node->x())); - s.args().append("in_data_type", to_str(node->in_data_type())); - s.args().append("out_data_type", to_str(node->out_data_type())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - for (uint32_t i = 0; i < node->numValues(); ++i) - s.args().append("values", tbl->lookup(node->values(i))); - s.args().append("axis", pepper::str(node->axis())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - assert(node->padding() != luci::Padding::UNDEFINED); - - s.args().append("input", tbl->lookup(node->input())); - s.args().append("filter", tbl->lookup(node->filter())); - s.args().append("bias", tbl->lookup(node->bias())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("dilation(h,w)", to_str(node->dilation())); - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node, - locop::NodeSummary &s) -{ - for (uint32_t i = 0; i < node->numInputs(); i++) - { - s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i))); - } - s.args().append("custom_code", node->custom_code()); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("block_size", std::to_string(node->block_size())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - assert(node->padding() != luci::Padding::UNDEFINED); - - s.args().append("input", tbl->lookup(node->input())); - s.args().append("filter", tbl->lookup(node->filter())); - s.args().append("bias", tbl->lookup(node->bias())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("dilation(h,w)", to_str(node->dilation())); - s.args().append("padding", to_str(node->padding())); - s.args().append("depthMultiplier", std::to_string(node->depthMultiplier())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("axis", tbl->lookup(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFakeQuant *node, - locop::NodeSummary &s) -{ - s.args().append("inputs", tbl->lookup(node->inputs())); - s.args().append("min", pepper::str(node->min())); - s.args().append("max", pepper::str(node->max())); - s.args().append("num_bits", pepper::str(node->num_bits())); - s.args().append("narrow_range", node->narrow_range() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node, - locop::NodeSummary &s) -{ - s.args().append("dims", tbl->lookup(node->dims())); - s.args().append("value", tbl->lookup(node->value())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("input", tbl->lookup(node->input())); - s.args().append("weights", tbl->lookup(node->weights())); - s.args().append("bias", tbl->lookup(node->bias())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node, - locop::NodeSummary &s) -{ - s.args().append("params", tbl->lookup(node->params())); - s.args().append("indices", tbl->lookup(node->indices())); - s.args().append("axis", pepper::str(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node, - locop::NodeSummary &s) -{ - s.args().append("params", tbl->lookup(node->params())); - s.args().append("indices", tbl->lookup(node->indices())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s) -{ - s.args().append("cond", tbl->lookup(node->cond())); - for (uint32_t i = 0; i < node->input_count(); ++i) - s.args().append("input", tbl->lookup(node->input(i))); - - if (node->then_graph() != nullptr) - s.args().append("then_graph", node->then_graph()->name()); - else - s.args().append("then_branch", pepper::str(node->then_branch())); - - if (node->else_graph() != nullptr) - s.args().append("else_graph", node->else_graph()->name()); - else - s.args().append("else_branch", pepper::str(node->else_branch())); - - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node, - locop::NodeSummary &s) -{ - s.args().append("x", tbl->lookup(node->x())); - s.args().append("fused_activation_function", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Pool2D *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("value", tbl->lookup(node->value())); - s.args().append("filter(h,w)", to_str(node->filter())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node, - locop::NodeSummary &s) -{ - s.args().append("features", tbl->lookup(node->features())); - s.args().append("alpha", std::to_string(node->alpha())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("radius", pepper::str(node->radius())); - s.args().append("bias", pepper::str(node->bias())); - s.args().append("alpha", pepper::str(node->alpha())); - s.args().append("beta", pepper::str(node->beta())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node, - locop::NodeSummary &s) -{ - s.args().append("logits", tbl->lookup(node->logits())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node, - locop::NodeSummary &s) -{ - s.args().append("diagonal", tbl->lookup(node->diagonal())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("diagonal", tbl->lookup(node->diagonal())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - - s.args().append("value", tbl->lookup(node->value())); - s.args().append("filter(h,w)", to_str(node->filter())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("paddings", tbl->lookup(node->paddings())); - s.args().append("mode", to_str(node->mode())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node, - locop::NodeSummary &s) -{ - s.args().append("boxes", tbl->lookup(node->boxes())); - s.args().append("scores", tbl->lookup(node->scores())); - s.args().append("max_output_size", tbl->lookup(node->max_output_size())); - s.args().append("iou_threshold", tbl->lookup(node->iou_threshold())); - s.args().append("score_threshold", tbl->lookup(node->score_threshold())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node, - locop::NodeSummary &s) -{ - s.args().append("boxes", tbl->lookup(node->boxes())); - s.args().append("scores", tbl->lookup(node->scores())); - s.args().append("max_output_size", tbl->lookup(node->max_output_size())); - s.args().append("iou_threshold", tbl->lookup(node->iou_threshold())); - s.args().append("score_threshold", tbl->lookup(node->score_threshold())); - s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node, - locop::NodeSummary &s) -{ - s.args().append("indices", tbl->lookup(node->indices())); - s.args().append("depth", tbl->lookup(node->depth())); - s.args().append("on_value", tbl->lookup(node->on_value())); - s.args().append("off_value", tbl->lookup(node->off_value())); - s.args().append("axis", pepper::str(node->axis())); - - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node, - locop::NodeSummary &s) -{ - for (uint32_t i = 0; i < node->values_count(); ++i) - s.args().append("values", tbl->lookup(node->values(i))); - s.args().append("values_count", pepper::str(node->values_count())); - s.args().append("axis", pepper::str(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("paddings", tbl->lookup(node->paddings())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("paddings", tbl->lookup(node->paddings())); - s.args().append("constant_values", tbl->lookup(node->constant_values())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("alpha", tbl->lookup(node->alpha())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node, - locop::NodeSummary &s) -{ - s.args().append("start", tbl->lookup(node->start())); - s.args().append("limit", tbl->lookup(node->limit())); - s.args().append("delta", tbl->lookup(node->delta())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node, - locop::NodeSummary &s) -{ - s.args().append("tensor", tbl->lookup(node->tensor())); - s.args().append("shape", tbl->lookup(node->shape())); - // TODO Show newShape info - s.state(locop::NodeSummary::State::PartiallyKnown); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("size", tbl->lookup(node->size())); - s.args().append("align_corners", node->align_corners() ? "true" : "false"); - s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("size", tbl->lookup(node->size())); - s.args().append("align_corners", node->align_corners() ? "true" : "false"); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("seq_lengths", tbl->lookup(node->seq_lengths())); - s.args().append("seq_axis", std::to_string(node->seq_axis())); - s.args().append("batch_axis", std::to_string(node->batch_axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node, - locop::NodeSummary &s) -{ - s.args().append("tensor", tbl->lookup(node->tensor())); - s.args().append("axis", tbl->lookup(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node, - locop::NodeSummary &s) -{ - s.args().append("indices", tbl->lookup(node->indices())); - s.args().append("updates", tbl->lookup(node->updates())); - s.args().append("shape", tbl->lookup(node->shape())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("segment_ids", tbl->lookup(node->segment_ids())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node, - locop::NodeSummary &s) -{ - s.args().append("condition", tbl->lookup(node->condition())); - s.args().append("t", tbl->lookup(node->t())); - s.args().append("e", tbl->lookup(node->e())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node, - locop::NodeSummary &s) -{ - s.args().append("condition", tbl->lookup(node->condition())); - s.args().append("t", tbl->lookup(node->t())); - s.args().append("e", tbl->lookup(node->e())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("out_type", to_str(node->out_type())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("begin", tbl->lookup(node->begin())); - s.args().append("size", tbl->lookup(node->size())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node, - locop::NodeSummary &s) -{ - s.args().append("logits", tbl->lookup(node->logits())); - s.args().append("beta", pepper::str(node->beta())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("block_shape", tbl->lookup(node->block_shape())); - s.args().append("paddings", tbl->lookup(node->paddings())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("block_size", pepper::str(node->block_size())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node, - locop::NodeSummary &s) -{ - s.args().append("indices", tbl->lookup(node->indices())); - s.args().append("output_shape", tbl->lookup(node->output_shape())); - s.args().append("values", tbl->lookup(node->values())); - s.args().append("default_value", tbl->lookup(node->default_value())); - s.args().append("Validate_indices", pepper::str(node->validate_indices())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node, - locop::NodeSummary &s) -{ - s.args().append("split_dim", tbl->lookup(node->split_dim())); - s.args().append("input", tbl->lookup(node->input())); - s.args().append("num_split", pepper::str(node->num_split())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("size_splits", tbl->lookup(node->size_splits())); - s.args().append("split_dim", tbl->lookup(node->split_dim())); - s.args().append("num_split", pepper::str(node->num_split())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - - std::stringstream ss{"("}; - for (size_t i = 0; i < node->squeeze_dims().size(); ++i) - { - if (i != 0) - ss << ", "; - ss << node->squeeze_dims()[i]; - } - ss << ")"; - s.args().append("squeeze_dims", ss.str()); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("begin", tbl->lookup(node->begin())); - s.args().append("end", tbl->lookup(node->end())); - s.args().append("strides", tbl->lookup(node->strides())); - s.args().append("begin_mask", pepper::str(node->begin_mask())); - s.args().append("end_mask", pepper::str(node->end_mask())); - s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask())); - s.args().append("new_axis_mask", pepper::str(node->new_axis_mask())); - s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("multiples", tbl->lookup(node->multiples())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("k", tbl->lookup(node->k())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node, - locop::NodeSummary &s) -{ - s.args().append("a", tbl->lookup(node->a())); - s.args().append("perm", tbl->lookup(node->perm())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node, - locop::NodeSummary &s) -{ - assert(node->padding() != luci::Padding::UNDEFINED); - - s.args().append("inputSizes", tbl->lookup(node->inputSizes())); - s.args().append("filter", tbl->lookup(node->filter())); - s.args().append("outBackprop", tbl->lookup(node->outBackprop())); - s.args().append("bias", tbl->lookup(node->bias())); - s.args().append("stride(h,w)", to_str(node->stride())); - s.args().append("padding", to_str(node->padding())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnidirectionalSequenceLSTM *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - - s.args().append("input_to_input_weights", tbl->lookup(node->input_to_input_weights())); - s.args().append("input_to_forget_weights", tbl->lookup(node->input_to_forget_weights())); - s.args().append("input_to_cell_weights", tbl->lookup(node->input_to_cell_weights())); - s.args().append("input_to_output_weights", tbl->lookup(node->input_to_output_weights())); - - s.args().append("recurrent_to_input_weights", tbl->lookup(node->recurrent_to_input_weights())); - s.args().append("recurrent_to_forget_weights", tbl->lookup(node->recurrent_to_forget_weights())); - s.args().append("recurrent_to_cell_weights", tbl->lookup(node->recurrent_to_cell_weights())); - s.args().append("recurrent_to_output_weights", tbl->lookup(node->recurrent_to_output_weights())); - - s.args().append("cell_to_input_weights", tbl->lookup(node->cell_to_input_weights())); - s.args().append("cell_to_forget_weights", tbl->lookup(node->cell_to_forget_weights())); - s.args().append("cell_to_output_weights", tbl->lookup(node->cell_to_output_weights())); - - s.args().append("input_gate_bias", tbl->lookup(node->input_gate_bias())); - s.args().append("forget_gate_bias", tbl->lookup(node->forget_gate_bias())); - s.args().append("cell_gate_bias", tbl->lookup(node->cell_gate_bias())); - s.args().append("output_gate_bias", tbl->lookup(node->output_gate_bias())); - - s.args().append("projection_weights", tbl->lookup(node->projection_weights())); - s.args().append("projection_bias", tbl->lookup(node->projection_bias())); - - s.args().append("activation_state", tbl->lookup(node->activation_state())); - s.args().append("cell_state", tbl->lookup(node->cell_state())); - - s.args().append("input_layer_norm_coefficients", - tbl->lookup(node->input_layer_norm_coefficients())); - s.args().append("forget_layer_norm_coefficients", - tbl->lookup(node->forget_layer_norm_coefficients())); - s.args().append("cell_layer_norm_coefficients", - tbl->lookup(node->cell_layer_norm_coefficients())); - s.args().append("output_layer_norm_coefficients", - tbl->lookup(node->output_layer_norm_coefficients())); - - s.args().append("cell_clip", to_str(node->cell_clip())); - s.args().append("proj_clip", to_str(node->proj_clip())); - s.args().append("time_major", to_str(node->time_major())); - s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs())); - - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node, - locop::NodeSummary &s) -{ - s.args().append("input", tbl->lookup(node->input())); - s.args().append("idx_out_type", to_str(node->idx_out_type())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node, - locop::NodeSummary &s) -{ - s.args().append("value", tbl->lookup(node->value())); - s.args().append("num", pepper::str(node->num())); - s.args().append("axis", pepper::str(node->axis())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node, - locop::NodeSummary &s) -{ - s.args().append("condition", tbl->lookup(node->condition())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node, - locop::NodeSummary &s) -{ - for (uint32_t i = 0; i < node->input_count(); ++i) - s.args().append("input", tbl->lookup(node->input(i))); - - if (node->cond_graph() != nullptr) - s.args().append("cond_graph", node->cond_graph()->name()); - else - s.args().append("cond_branch", pepper::str(node->cond_branch())); - - if (node->body_graph() != nullptr) - s.args().append("body_graph", node->body_graph()->name()); - else - s.args().append("body_branch", pepper::str(node->body_branch())); - - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node, - locop::NodeSummary &s) -{ - s.args().append("topkv2", tbl->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node, - locop::NodeSummary &s) -{ - s.args().append("unique", tbl->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node, - locop::NodeSummary &s) -{ - s.args().append("unpack", tbl->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node, - locop::NodeSummary &s) -{ - s.args().append("while", tbl->lookup(node->input())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node, - locop::NodeSummary &s) -{ - s.args().append("from", tbl->lookup(node->from())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *, const luci::CircleOutputDummy *, - locop::NodeSummary &s) -{ - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *, const luci::CircleOutputExclude *, - locop::NodeSummary &s) -{ - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node, - locop::NodeSummary &s) -{ - assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED); - s.args().append("input", tbl->lookup(node->input())); - s.args().append("weights_scales", tbl->lookup(node->weights_scales())); - s.args().append("weights_binary", tbl->lookup(node->weights_binary())); - s.args().append("bias", tbl->lookup(node->bias())); - s.args().append("weights_clusters", tbl->lookup(node->weights_clusters())); - s.args().append("fused", to_str(node->fusedActivationFunction())); - s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node, - locop::NodeSummary &s) -{ - s.args().append("input_scales", tbl->lookup(node->input_scales())); - s.args().append("input_binary", tbl->lookup(node->input_binary())); - s.args().append("indices", tbl->lookup(node->indices())); - s.args().append("input_clusters", tbl->lookup(node->input_clusters())); - s.args().append("axis", pepper::str(node->axis())); - s.args().append("input_hidden_size", pepper::str(node->input_hidden_size())); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node, - locop::NodeSummary &s) -{ - auto fused = node->fusedActivationFunction(); - assert(fused != luci::FusedActFunc::UNDEFINED); - - s.args().append("input", tbl->lookup(node->input())); - s.args().append("gamma", tbl->lookup(node->gamma())); - s.args().append("beta", tbl->lookup(node->beta())); - s.args().append("epsilon", pepper::str(node->epsilon())); - s.args().append("fused_activation_function", to_str(fused)); - s.state(locop::NodeSummary::State::Complete); - return true; -} - -// SummaryBuilderLet type -enum class SB -{ - ABC, - DEF, - GHIJ, - KLMN, - OPQR, - STUV, - WXYZ, - CIRC, // circle only - VIRT, // virtual -}; - -template <SB sb> class SummaryBuilderLet; - -#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final; - -template <> class SummaryBuilderLet<SB::ABC> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleAbs) - IMPLEMENT(luci::CircleAdd) - IMPLEMENT(luci::CircleAddN) - IMPLEMENT(luci::CircleArgMax) - IMPLEMENT(luci::CircleArgMin) - IMPLEMENT(luci::CircleAveragePool2D) - IMPLEMENT(luci::CircleBatchMatMul) - IMPLEMENT(luci::CircleBatchToSpaceND) - IMPLEMENT(luci::CircleBidirectionalSequenceLSTM) - IMPLEMENT(luci::CircleCast) - IMPLEMENT(luci::CircleCeil) - IMPLEMENT(luci::CircleConcatenation) - IMPLEMENT(luci::CircleConst) - IMPLEMENT(luci::CircleConv2D) - IMPLEMENT(luci::CircleCos) - IMPLEMENT(luci::CircleCustom) -}; - -template <> class SummaryBuilderLet<SB::DEF> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleDepthToSpace) - IMPLEMENT(luci::CircleDepthwiseConv2D) - IMPLEMENT(luci::CircleDequantize) - IMPLEMENT(luci::CircleDiv) - IMPLEMENT(luci::CircleElu) - IMPLEMENT(luci::CircleEqual) - IMPLEMENT(luci::CircleExp) - IMPLEMENT(luci::CircleExpandDims) - IMPLEMENT(luci::CircleFakeQuant) - IMPLEMENT(luci::CircleFill) - IMPLEMENT(luci::CircleFloor) - IMPLEMENT(luci::CircleFloorDiv) - IMPLEMENT(luci::CircleFloorMod) - IMPLEMENT(luci::CircleFullyConnected) -}; - -template <> class SummaryBuilderLet<SB::GHIJ> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleGather) - IMPLEMENT(luci::CircleGatherNd) - IMPLEMENT(luci::CircleGreater) - IMPLEMENT(luci::CircleGreaterEqual) - IMPLEMENT(luci::CircleIf) -}; - -template <> class SummaryBuilderLet<SB::KLMN> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleL2Normalize) - IMPLEMENT(luci::CircleL2Pool2D) - IMPLEMENT(luci::CircleLeakyRelu) - IMPLEMENT(luci::CircleLess) - IMPLEMENT(luci::CircleLessEqual) - IMPLEMENT(luci::CircleLocalResponseNormalization) - IMPLEMENT(luci::CircleLog) - IMPLEMENT(luci::CircleLogicalAnd) - IMPLEMENT(luci::CircleLogicalNot) - IMPLEMENT(luci::CircleLogicalOr) - IMPLEMENT(luci::CircleLogistic) - IMPLEMENT(luci::CircleLogSoftmax) - IMPLEMENT(luci::CircleMatrixDiag) - IMPLEMENT(luci::CircleMatrixSetDiag) - IMPLEMENT(luci::CircleMaximum) - IMPLEMENT(luci::CircleMaxPool2D) - IMPLEMENT(luci::CircleMean) - IMPLEMENT(luci::CircleMinimum) - IMPLEMENT(luci::CircleMirrorPad) - IMPLEMENT(luci::CircleMul) - IMPLEMENT(luci::CircleNeg) - IMPLEMENT(luci::CircleNonMaxSuppressionV4) - IMPLEMENT(luci::CircleNonMaxSuppressionV5) - IMPLEMENT(luci::CircleNotEqual) -}; - -template <> class SummaryBuilderLet<SB::OPQR> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleOneHot) - IMPLEMENT(luci::CirclePack) - IMPLEMENT(luci::CirclePad) - IMPLEMENT(luci::CirclePadV2) - IMPLEMENT(luci::CirclePow) - IMPLEMENT(luci::CirclePRelu) - IMPLEMENT(luci::CircleQuantize) - IMPLEMENT(luci::CircleRange) - IMPLEMENT(luci::CircleRank) - IMPLEMENT(luci::CircleReduceAny) - IMPLEMENT(luci::CircleReduceMax) - IMPLEMENT(luci::CircleReduceMin) - IMPLEMENT(luci::CircleReduceProd) - IMPLEMENT(luci::CircleRelu) - IMPLEMENT(luci::CircleRelu6) - IMPLEMENT(luci::CircleReluN1To1) - IMPLEMENT(luci::CircleReshape) - IMPLEMENT(luci::CircleResizeBilinear) - IMPLEMENT(luci::CircleResizeNearestNeighbor) - IMPLEMENT(luci::CircleReverseSequence) - IMPLEMENT(luci::CircleReverseV2) - IMPLEMENT(luci::CircleRound) - IMPLEMENT(luci::CircleRsqrt) -}; - -template <> class SummaryBuilderLet<SB::STUV> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleScatterNd) - IMPLEMENT(luci::CircleSegmentSum) - IMPLEMENT(luci::CircleSelect) - IMPLEMENT(luci::CircleSelectV2) - IMPLEMENT(luci::CircleShape) - IMPLEMENT(luci::CircleSin) - IMPLEMENT(luci::CircleSlice) - IMPLEMENT(luci::CircleSoftmax) - IMPLEMENT(luci::CircleSpaceToBatchND) - IMPLEMENT(luci::CircleSpaceToDepth) - IMPLEMENT(luci::CircleSparseToDense) - IMPLEMENT(luci::CircleSplit) - IMPLEMENT(luci::CircleSplitV) - IMPLEMENT(luci::CircleSqrt) - IMPLEMENT(luci::CircleSquare) - IMPLEMENT(luci::CircleSquaredDifference) - IMPLEMENT(luci::CircleSqueeze) - IMPLEMENT(luci::CircleStridedSlice) - IMPLEMENT(luci::CircleSub) - IMPLEMENT(luci::CircleSum) - IMPLEMENT(luci::CircleTanh) - IMPLEMENT(luci::CircleTile) - IMPLEMENT(luci::CircleTopKV2) - IMPLEMENT(luci::CircleTranspose) - IMPLEMENT(luci::CircleTransposeConv) - IMPLEMENT(luci::CircleUnidirectionalSequenceLSTM) - IMPLEMENT(luci::CircleUnique) - IMPLEMENT(luci::CircleUnpack) -}; - -template <> class SummaryBuilderLet<SB::WXYZ> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleWhere) - IMPLEMENT(luci::CircleWhile) - IMPLEMENT(luci::CircleZerosLike) -}; - -template <> class SummaryBuilderLet<SB::CIRC> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleBCQFullyConnected) - IMPLEMENT(luci::CircleBCQGather) - IMPLEMENT(luci::CircleInstanceNorm) -}; - -template <> class SummaryBuilderLet<SB::VIRT> final : public CircleNodeSummaryBuilderBase -{ -public: - SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl) - { - // DO NOTHING - } - -private: - IMPLEMENT(luci::CircleInput) - IMPLEMENT(luci::CircleOutput) - IMPLEMENT(luci::CircleCustomOut) - IMPLEMENT(luci::CircleIfOut) - IMPLEMENT(luci::CircleNonMaxSuppressionV4Out) - IMPLEMENT(luci::CircleNonMaxSuppressionV5Out) - IMPLEMENT(luci::CircleOutputDummy) - IMPLEMENT(luci::CircleOutputExclude) - IMPLEMENT(luci::CircleSplitOut) - IMPLEMENT(luci::CircleSplitVOut) - IMPLEMENT(luci::CircleTopKV2Out) - IMPLEMENT(luci::CircleUniqueOut) - IMPLEMENT(luci::CircleUnpackOut) - IMPLEMENT(luci::CircleWhileOut) -}; - -#undef IMPLEMENT - -bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const -{ - if (node->dialect() != luci::CircleDialect::get()) - return false; - - auto ptr_to_str = [](const void *ptr) { - std::stringstream ss; - ss << ptr; - return ss.str(); - }; - - auto add_comment = [&]() { - auto cnode = loco::must_cast<const luci::CircleNode *>(node); - s.opname(circle_opname(node->opnum())); - s.comments().append("[" + cnode->name() + "] = " + ptr_to_str(node)); - }; - -#define CIRCLE_NODE(OPCODE, CLASS) \ - if (dynamic_cast<const CLASS *>(node)) \ - { \ - if (summary(dynamic_cast<const CLASS *>(node), s)) \ - { \ - add_comment(); \ - return true; \ - } \ - } -#define CIRCLE_VNODE CIRCLE_NODE -#include <luci/IR/CircleNodes.lst> -#undef CIRCLE_VNODE -#undef CIRCLE_NODE - - return false; -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const -{ - return use_xy_act(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMax *node, - locop::NodeSummary &s) const -{ - return use_ido(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMin *node, - locop::NodeSummary &s) const -{ - return use_ido(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAveragePool2D *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchMatMul *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchToSpaceND *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBidirectionalSequenceLSTM *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCast *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConcatenation *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConst *, locop::NodeSummary &s) const -{ - s.state(locop::NodeSummary::State::PartiallyKnown); - return true; -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConv2D *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCos *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCustom *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthToSpace *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthwiseConv2D *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDequantize *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleElu *node, locop::NodeSummary &s) const -{ - return use_features(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleEqual *node, locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExp *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExpandDims *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFakeQuant *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFill *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorDiv *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorMod *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFullyConnected *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGather *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGatherNd *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreater *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreaterEqual *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleIf *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Normalize *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Pool2D *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLess *node, locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLessEqual *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLeakyRelu *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLocalResponseNormalization *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLog *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalAnd *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalNot *node, - locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalOr *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogistic *node, - locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogSoftmax *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixDiag *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixSetDiag *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaximum *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaxPool2D *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMean *node, locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMinimum *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMirrorPad *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMul *node, locop::NodeSummary &s) const -{ - return use_xy_act(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNeg *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV4 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV5 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNotEqual *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleOneHot *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePack *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePad *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePadV2 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePow *node, locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePRelu *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleQuantize *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRange *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRank *node, locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceAny *node, - locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMax *node, - locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMin *node, - locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceProd *node, - locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu *node, locop::NodeSummary &s) const -{ - return use_features(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu6 *node, - locop::NodeSummary &s) const -{ - return use_features(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReluN1To1 *node, - locop::NodeSummary &s) const -{ - return use_features(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReshape *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeBilinear *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeNearestNeighbor *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseSequence *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseV2 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRound *node, - locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRsqrt *node, - locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleScatterNd *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSegmentSum *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelect *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelectV2 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleShape *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSin *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSlice *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSoftmax *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToBatchND *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToDepth *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSparseToDense *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplit *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplitV *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquare *node, - locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquaredDifference *node, - locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqueeze *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleStridedSlice *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSub *node, locop::NodeSummary &s) const -{ - return use_xy(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSum *node, locop::NodeSummary &s) const -{ - return use_reducer(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTanh *node, locop::NodeSummary &s) const -{ - return use_x(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTile *node, locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTopKV2 *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTranspose *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTransposeConv *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnidirectionalSequenceLSTM *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnique *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnpack *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhere *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhile *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleZerosLike *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQFullyConnected *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQGather *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleInstanceNorm *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleInput *, locop::NodeSummary &s) const -{ - s.state(locop::NodeSummary::State::Complete); - return true; -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutput *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleCustomOut *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleIfOut *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV4Out *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV5Out *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputDummy *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputExclude *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitOut *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitVOut *node, - locop::NodeSummary &s) const -{ - return use_input(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleTopKV2Out *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUniqueOut *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUnpackOut *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleWhileOut *node, - locop::NodeSummary &s) const -{ - return summary_node(tbl(), node, s); -} - -} // namespace - namespace luci { @@ -2208,22 +36,10 @@ bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) co return true; } -#define BUILD_GRP(GRP) \ - do \ - { \ - if (SummaryBuilderLet<SB::GRP>(_tbl).build(node, s)) \ - return true; \ - } while (false) - - BUILD_GRP(ABC); - BUILD_GRP(DEF); - BUILD_GRP(GHIJ); - BUILD_GRP(KLMN); - BUILD_GRP(OPQR); - BUILD_GRP(STUV); - BUILD_GRP(WXYZ); - BUILD_GRP(CIRC); - BUILD_GRP(VIRT); + if (CircleNodeSummaryBuilder().build(node, _tbl, s)) + { + return true; + } return false; } diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt index ec8e0b0d6..f28207df2 100644 --- a/compiler/luci/partition/CMakeLists.txt +++ b/compiler/luci/partition/CMakeLists.txt @@ -13,7 +13,7 @@ target_link_libraries(luci_partition PUBLIC luci_lang) target_link_libraries(luci_partition PRIVATE luci_service) target_link_libraries(luci_partition PRIVATE luci_log) target_link_libraries(luci_partition PRIVATE luci_logex) -target_link_libraries(luci_partition PRIVATE mio_circle) +target_link_libraries(luci_partition PRIVATE mio_circle04) target_link_libraries(luci_partition PRIVATE nncc_common) target_link_libraries(luci_partition PRIVATE pepper_csv2vec) target_link_libraries(luci_partition PRIVATE oops) diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h index ebbff7a6a..e60567c69 100644 --- a/compiler/luci/partition/src/ConnectNode.h +++ b/compiler/luci/partition/src/ConnectNode.h @@ -161,6 +161,7 @@ public: void visit(const luci::CircleSquaredDifference *) final; void visit(const luci::CircleSqueeze *) final; void visit(const luci::CircleStridedSlice *) final; + void visit(const luci::CircleSVDF *) final; void visit(const luci::CircleSub *) final; void visit(const luci::CircleSum *) final; void visit(const luci::CircleTanh *) final; @@ -197,6 +198,7 @@ public: void visit(const luci::CircleTopKV2Out *) final; void visit(const luci::CircleUniqueOut *) final; void visit(const luci::CircleUnpackOut *) final; + void visit(const luci::CircleVariable *) final; void visit(const luci::CircleWhileOut *) final; public: diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp new file mode 100644 index 000000000..f661a794c --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConnectNode.h" + +namespace +{ + +void connect(luci::ConnectNode *cn, const luci::CircleSVDF *node) +{ + auto *cloned = loco::must_cast<luci::CircleSVDF *>(cn->find_clone(node)); + + luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input()); + luci::CircleNode *weight_feature = loco::must_cast<luci::CircleNode *>(node->weight_feature()); + luci::CircleNode *weight_time = loco::must_cast<luci::CircleNode *>(node->weight_time()); + luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias()); + luci::CircleNode *input_activation_state = + loco::must_cast<luci::CircleNode *>(node->input_activation_state()); + + cloned->input(cn->find_clone(input)); + cloned->weight_feature(cn->find_clone(weight_feature)); + cloned->weight_time(cn->find_clone(weight_time)); + cloned->bias(cn->find_clone(bias)); + cloned->input_activation_state(cn->find_clone(input_activation_state)); +} + +} // namespace + +namespace luci +{ + +void ConnectNode::visit(const luci::CircleSVDF *node) { connect(this, node); } + +} // namespace luci diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp new file mode 100644 index 000000000..5fae5206e --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConnectNode.h" + +#include "ConnectNode.test.h" + +#include <luci/Service/CircleNodeClone.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +class NodeGraphlet : public NodeGraphletT<luci::CircleSVDF> +{ +public: + NodeGraphlet() = default; + +public: + void init(loco::Graph *g) + { + NodeGraphletT<luci::CircleSVDF>::init(g); + + _node->fusedActivationFunction(luci::FusedActFunc::RELU); + } +}; + +class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet +{ +public: + TestNodeGraph() = default; + +public: + void init(const ShapeU32 shape) + { + TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape); + NodeGraphlet::init(g()); + + node()->input(input(0)); + node()->weight_feature(input(1)); + node()->weight_time(input(2)); + node()->bias(input(3)); + node()->input_activation_state(input(4)); + + output()->from(node()); + } +}; + +} // namespace + +TEST(ConnectNodeTest, connect_SVDF) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone)); + + cth.clone_connect(node, clone); + + ASSERT_EQ(5, clone->arity()); + ASSERT_EQ(cth.inputs(0), clone->arg(0)); + ASSERT_EQ(cth.inputs(1), clone->arg(1)); + ASSERT_EQ(cth.inputs(2), clone->arg(2)); + ASSERT_EQ(cth.inputs(3), clone->arg(3)); + ASSERT_EQ(cth.inputs(4), clone->arg(4)); +} + +TEST(ConnectNodeTest, connect_SVDF_NEG) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs_miss(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone)); + + EXPECT_ANY_THROW(cth.clone_connect(node, clone)); +} diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp new file mode 100644 index 000000000..f7f6f21fd --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConnectNode.h" + +namespace luci +{ + +void ConnectNode::visit(const luci::CircleVariable *) +{ + // Nothing to do +} + +} // namespace luci diff --git a/compiler/luci/partition/src/PartitionIRDump.cpp b/compiler/luci/partition/src/PartitionIRDump.cpp index 4f2c26800..0fabfc416 100644 --- a/compiler/luci/partition/src/PartitionIRDump.cpp +++ b/compiler/luci/partition/src/PartitionIRDump.cpp @@ -32,18 +32,18 @@ void dump(std::ostream &os, const PNode *pnode) void dump(std::ostream &os, const PGroup *pgroup) { os << "--- PGroup: " << pgroup->group << std::endl; - os << "Input(s): "; + os << "Input(s): [ "; for (auto &node_in : pgroup->inputs) os << node_in->name() << " "; - os << std::endl; + os << "]" << std::endl; for (auto &pnode : pgroup->pnodes) { dump(os, pnode.get()); } - os << "Output(s): "; + os << "Output(s): [ "; for (auto &node_out : pgroup->outputs) os << node_out->name() << " "; - os << std::endl; + os << "]" << std::endl; } void dump(std::ostream &os, const PGroups *pgroups) @@ -57,7 +57,8 @@ void dump(std::ostream &os, const PGroups *pgroups) { auto node = it->first; auto group = it->second; - os << " Node: " << node << "(" << node->name() << "): " << group << std::endl; + os << " Node: " << node << "(" << luci::opcode_name(node) << "," << node->name() + << "): " << group << std::endl; } } diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp index c517bf93f..4c3971bd8 100644 --- a/compiler/luci/partition/src/PartitionMerge.cpp +++ b/compiler/luci/partition/src/PartitionMerge.cpp @@ -58,9 +58,6 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups) // we need to clone this CircleConst for each graph of the group. if (dynamic_cast<const luci::CircleConst *>(input) != nullptr) continue; - // Skip also for OutputExclude - if (dynamic_cast<const luci::CircleOutputExclude *>(input) != nullptr) - continue; auto input_group = pgroups->group_of(input); // NOTE: all the nodes should be registered and return should be valid group. @@ -87,7 +84,7 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups) input_pgroup = pgroup_input; else { - if (input_pgroup != pgroup_input) + if (input_pgroup->group != pgroup_input->group) return false; } } @@ -96,6 +93,48 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups) } /** + * @brief return true if there is only one output and is fed to same group of nodes + * @note pgroups is used to find group of pgroup + * ex) + * /-- pgroup_user_1 (grp_1) + * --- pgroup + * \-- pgroup_user_2 (grp_2) + * + * return false if grp_1 != grp_2 + */ +bool is_output_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups) +{ + assert(pgroups != nullptr); + assert(pgroup != nullptr); + + std::string group; + for (auto &output : pgroup->outputs) + { + // get output_group + auto output_group = pgroups->group_of(output); + assert(not output_group.empty()); + if (output_group.empty()) + output_group = pgroups->default_group; + + // find all PGroup that uses output + for (auto &pgroup_user : pgroups->pgroups) + { + for (auto &user_inputs : pgroup_user->inputs) + { + if (output == user_inputs) + { + // OK, these are connected, check group is same + if (pgroup_user->group != output_group) + return false; + } + } + } + } + + return true; +} + +/** * @brief merge pgroup into pgroup_i * @note output of pgroup_i should be input of pgroup */ @@ -191,6 +230,9 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups) // skip if there are multiple inputs but inputs differ in group if (!is_input_same(pgroup.get(), d_pgroups.get())) continue; + // skip if pgroup has different group for other users of pgroup_i + if (!is_output_same(pgroup_i.get(), d_pgroups.get())) + continue; // TODO add more condition may be needed merge_into(pgroup.get(), pgroup_i.get()); diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp index 0080873e6..eaeacf9c4 100644 --- a/compiler/luci/partition/src/PartitionPGroups.cpp +++ b/compiler/luci/partition/src/PartitionPGroups.cpp @@ -46,6 +46,9 @@ public: bool visit(const luci::CircleUniqueOut *) final { return true; } bool visit(const luci::CircleUnpackOut *) final { return true; } bool visit(const luci::CircleWhileOut *) final { return true; } + // For inputs not used + bool visit(const luci::CircleOutputExclude *) final { return true; } + bool visit(const luci::CircleVariable *) final { return true; } // TODO add all virtual nodes // default is false @@ -69,59 +72,80 @@ bool check_allocate_partition(const luci::CircleNode *node) return true; } -class FindGroupToFollow final : public luci::CircleNodeVisitor<const std::string &> +} // namespace + +namespace { -public: - FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups) - : _partition(partition), _pgroups(pgroups) - { - // NOTHING TODO - } -private: - const std::string &groupof(const luci::CircleNode *input) const +std::string group_from_partition(const luci::CircleNode *node, + const luci::PartitionTable &partition) +{ + LOGGER(l); + + auto group = partition.default_group; + + std::string opcodename; // opcodename or opname + + switch (partition.comply) { - auto group = _pgroups->node2group[input]; - assert(not group.empty()); - if (group.empty()) - return _partition.default_group; - return _pgroups->node2group[input]; + case luci::PartitionTable::COMPLY::OPCODE: + { + opcodename = luci::opcode_name(node); + assert(!opcodename.empty()); + + auto it = partition.byopcodes.find(opcodename); + if (it != partition.byopcodes.end()) + group = it->second; + break; + } + case luci::PartitionTable::COMPLY::OPNAME: + { + opcodename = node->name(); + assert(!opcodename.empty()); + + auto it = partition.byopnames.find(opcodename); + if (it != partition.byopnames.end()) + group = it->second; + break; + } + + default: + throw std::runtime_error("Unsupported partition.comply"); } + INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group + << std::endl; + + return group; +} + +class IsVirtualInputNode final : public luci::CircleNodeVisitor<bool> +{ public: -#define IMPLEMENT(CLASS) \ - const std::string &visit(const luci::CLASS *node) final \ - { \ - auto input = loco::must_cast<luci::CircleNode *>(node->input()); \ - return groupof(input); \ - } + // TODO check CircleOutputDummy + bool visit(const luci::CircleOutputExclude *) final { return true; } + bool visit(const luci::CircleVariable *) final { return true; } - IMPLEMENT(CircleCustomOut); - IMPLEMENT(CircleIfOut); - IMPLEMENT(CircleNonMaxSuppressionV4Out); - IMPLEMENT(CircleNonMaxSuppressionV5Out); - IMPLEMENT(CircleSplitOut); - IMPLEMENT(CircleSplitVOut); - IMPLEMENT(CircleTopKV2Out); - IMPLEMENT(CircleUniqueOut); - IMPLEMENT(CircleUnpackOut); - IMPLEMENT(CircleWhileOut); - -#undef IMPLEMENT - - // return empty for nothing to do - const std::string &visit(const luci::CircleNode *) final { return _empty_str; } - -private: - const luci::PartitionTable &_partition; - luci::PGroups *_pgroups = nullptr; - std::string _empty_str; + // default is false + bool visit(const luci::CircleNode *) final { return false; } }; -} // namespace - -namespace +class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool> { +public: + bool visit(const luci::CircleCustom *) final { return true; } + bool visit(const luci::CircleIf *) final { return true; } + bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; } + bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; } + bool visit(const luci::CircleSplit *) final { return true; } + bool visit(const luci::CircleSplitV *) final { return true; } + bool visit(const luci::CircleTopKV2 *) final { return true; } + bool visit(const luci::CircleUnique *) final { return true; } + bool visit(const luci::CircleUnpack *) final { return true; } + bool visit(const luci::CircleWhile *) final { return true; } + // default is false + bool visit(const luci::CircleNode *) final { return false; } +}; void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx) { @@ -136,17 +160,56 @@ void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &g pgroup->pnodes.push_back(std::move(pnode)); + IsVirtualInputNode queryvi; // Set input of PGroup for (uint32_t in = 0; in < node->arity(); ++in) { auto input = loco::must_cast<luci::CircleNode *>(node->arg(in)); - // this input maybe CircleInput in source graph - // --> not confident this is safe - pgroup->inputs.push_back(input); + if (input->accept(&queryvi)) + { + auto pnode = std::make_unique<luci::PNode>(); + pnode->node = input; + pnode->group = group; + pnode->pgroup = pgroup.get(); + + pgroup->pnodes.push_back(std::move(pnode)); + + pgroups->node2group[input] = group; + } + else + { + // this input maybe CircleInput in source graph + // --> not confident this is safe + pgroup->inputs.push_back(input); + } + } + + IsMultiOutputNode query; + if (node->accept(&query)) + { + // Include CircleXXXOut virtual nodes in this group + auto succs = loco::succs(node); + for (auto &succ_node : succs) + { + auto nodeout = loco::must_cast<luci::CircleNode *>(succ_node); + + auto pnode = std::make_unique<luci::PNode>(); + pnode->node = nodeout; + pnode->group = group; + pnode->pgroup = pgroup.get(); + + pgroup->pnodes.push_back(std::move(pnode)); + + pgroups->node2group[nodeout] = group; + + pgroup->outputs.push_back(nodeout); + } + } + else + { + // Set output of PGroup: node itself + pgroup->outputs.push_back(node); } - // Set output of PGroup: node itself or multiple virtual outputs - // TODO support multiple virtual outputs - pgroup->outputs.push_back(node); pgroups->node2group[node] = group; pgroups->id2pgroup[pgroup->id] = pgroup.get(); @@ -182,70 +245,9 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source, // check if node is normal node that we are interested if (check_allocate_partition(node)) { - auto group = partition.default_group; - - std::string opcodename; // opcodename or opname - - switch (partition.comply) - { - case luci::PartitionTable::COMPLY::OPCODE: - { - opcodename = luci::opcode_name(node); - assert(!opcodename.empty()); - - auto it = partition.byopcodes.find(opcodename); - if (it != partition.byopcodes.end()) - group = it->second; - break; - } - case luci::PartitionTable::COMPLY::OPNAME: - { - opcodename = node->name(); - assert(!opcodename.empty()); - - auto it = partition.byopnames.find(opcodename); - if (it != partition.byopnames.end()) - group = it->second; - break; - } - - default: - throw std::runtime_error("Unsupported partition.comply"); - } - - INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group - << std::endl; + auto group = group_from_partition(node, partition); append(node, pgroups.get(), group, idx); -#if 0 - auto pgroup = std::make_unique<luci::PGroup>(); - pgroup->group = group; - pgroup->id = idx + 1; - - auto pnode = std::make_unique<luci::PNode>(); - pnode->node = node; - pnode->group = group; - pnode->pgroup = pgroup.get(); - - pgroup->pnodes.push_back(std::move(pnode)); - - // Set input of PGroup - for (uint32_t in = 0; in < node->arity(); ++in) - { - auto input = loco::must_cast<luci::CircleNode *>(node->arg(in)); - // this input maybe CircleInput in source graph - // --> not confident this is safe - pgroup->inputs.push_back(input); - } - // Set output of PGroup: node itself or multiple virtual outputs - // TODO support multiple virtual outputs - pgroup->outputs.push_back(node); - - pgroups->node2group[node] = group; - pgroups->id2pgroup[pgroup->id] = pgroup.get(); - - pgroups->pgroups.push_back(std::move(pgroup)); -#endif } else { @@ -255,22 +257,6 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source, } } - // handle for virtual nodes like multiple outputs - // these nodes should follow group of the input - for (uint32_t idx = 0; idx < nodes->size(); ++idx) - { - auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx)); - - // for virtual nodes like CircleUnpackOut should follow it's input (owner) - // or just set to default - FindGroupToFollow query(partition, pgroups.get()); - const auto &group = node->accept(&query); - if (not group.empty()) - { - append(node, pgroups.get(), group, idx); - } - } - return std::move(pgroups); } diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt index b8b406a38..5237c6d3f 100644 --- a/compiler/luci/pass/CMakeLists.txt +++ b/compiler/luci/pass/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.12 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "FlatBuffers NOT FOUND") return() @@ -23,11 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log) target_link_libraries(luci_pass PRIVATE luci_service) target_link_libraries(luci_pass PRIVATE luci_logex) target_link_libraries(luci_pass PRIVATE luci_profile) -target_link_libraries(luci_pass PRIVATE mio_tflite260_inc) +target_link_libraries(luci_pass PRIVATE mio_tflite280_inc) target_link_libraries(luci_pass PRIVATE nncc_common) target_link_libraries(luci_pass PRIVATE pepper_csv2vec) target_link_libraries(luci_pass PRIVATE oops) -target_link_libraries(luci_pass PRIVATE flatbuffers-1.12) +target_link_libraries(luci_pass PRIVATE flatbuffers-2.0) install(TARGETS luci_pass DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") @@ -43,5 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src) target_link_libraries(luci_pass_test luci_pass) target_link_libraries(luci_pass_test luci_lang) target_link_libraries(luci_pass_test luci_testhelper) -target_link_libraries(luci_pass_test flatbuffers-1.12) +target_link_libraries(luci_pass_test flatbuffers-2.0) #target_link_libraries(luci_pass_test oops) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 658563ecf..c803898f6 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -47,15 +47,12 @@ public: ResolveCustomOpBatchMatMul, ResolveCustomOpMatMul, ResolveCustomOpMaxPoolWithArgmax, - QuantizeDequantizeWeights, - QuantizeWithMinMax, - Requantize, FoldAddV2, FoldCast, FoldDepthwiseConv2D, FoldDequantize, + FoldGather, FoldSparseToDense, - ForceQuantParam, ForwardReshapeToUnaryOp, SparsifyTensorPass, FusePreActivationBatchNorm, @@ -79,6 +76,7 @@ public: TransformMinReluToRelu6Pass, SubstituteStridedSliceToReshape, SubstituteTransposeToReshape, + RemoveRedundantQuantize, RemoveRedundantReshape, RemoveFakeQuant, RemoveQuantDequantSeq, @@ -86,16 +84,6 @@ public: enum AlgorithmParameters { - // quantize - Quantize_input_model_dtype, - Quantize_output_model_dtype, - Quantize_granularity, // layer-wise or channel-wise - Quantize_tensor_names, - Quantize_scales, - Quantize_zero_points, - Quantize_input_type, - Quantize_output_type, - // sparsify Sparsify_tensor_name, Sparsify_traversal_order, @@ -114,8 +102,6 @@ public: virtual bool query(Algorithm) = 0; virtual void param(AlgorithmParameters, const std::string &) = 0; virtual const std::string param(AlgorithmParameters) const = 0; - virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0; - virtual std::vector<std::string> params(AlgorithmParameters) const = 0; }; public: @@ -127,8 +113,6 @@ public: void optimize(loco::Graph *) const; - void quantize(loco::Graph *) const; - void sparsify(loco::Graph *) const; private: diff --git a/compiler/luci/pass/include/luci/CircleQuantizer.h b/compiler/luci/pass/include/luci/CircleQuantizer.h new file mode 100644 index 000000000..4e7074d98 --- /dev/null +++ b/compiler/luci/pass/include/luci/CircleQuantizer.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_QUANTIZER_H__ +#define __LUCI_CIRCLE_QUANTIZER_H__ + +#include <loco.h> + +#include <string> +#include <vector> + +namespace luci +{ + +class CircleQuantizer final +{ +public: + struct Options + { + struct LayerParam + { + std::string name; + std::string dtype; + std::string granularity; + }; + + enum Algorithm + { + QuantizeDequantizeWeights, + QuantizeWithMinMax, + Requantize, + CopyQuantParam, + ForceQuantParam, + ConvertToFakeQuantizedModel, + }; + + enum AlgorithmParameters + { + // quantize + Quantize_input_model_dtype, + Quantize_output_model_dtype, + Quantize_granularity, // layer-wise or channel-wise + Quantize_tensor_names, + Quantize_scales, + Quantize_zero_points, + Quantize_layer_params, + + // copy_quantparam + Quantize_src_tensor_names, + Quantize_dst_tensor_names, + + Quantize_input_type, + Quantize_output_type, + Quantize_TF_style_maxpool, + }; + + virtual ~Options() = default; + + virtual void enable(Algorithm) = 0; + virtual bool query(Algorithm) = 0; + virtual void param(AlgorithmParameters, const std::string &) = 0; + virtual const std::string param(AlgorithmParameters) const = 0; + virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0; + virtual std::vector<std::string> params(AlgorithmParameters) const = 0; + + // Quantization parameters for multiple layers + virtual void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) = 0; + virtual std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const = 0; + }; + +public: + // TODO maybe caller can provide Options as ctor parameters + Options *options(void); + +public: + void quantize(loco::Graph *) const; + +private: + std::unique_ptr<Options> _options; +}; + +} // namespace luci + +#endif // __LUCI_CIRCLE_QUANTIZER_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h new file mode 100644 index 000000000..91dd2300e --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__ +#define __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to convert a quantized model to a fake-quantized fp32 model. + */ +struct ConvertToFakeQuantizedModelPass final : public logo::Pass +{ + ConvertToFakeQuantizedModelPass() {} + + const char *name(void) const final { return "luci::ConvertToFakeQuantizedModelPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h new file mode 100644 index 000000000..18c9cd56a --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_COPY_QUANT_PARAM_PASS_H__ +#define __LUCI_COPY_QUANT_PARAM_PASS_H__ + +#include <loco.h> + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Pass to copy quantparam (scale, zerop) of a tensor to another tensor + */ +class CopyQuantParamPass : public logo::Pass +{ +public: + using TensorVector = std::vector<std::string>; + +public: + CopyQuantParamPass(TensorVector &src_tensors, TensorVector &dst_tensors) + : _src_tensors{src_tensors}, _dst_tensors{dst_tensors} + { + // DO NOTHING + } + virtual const char *name(void) const { return "luci::CopyQuantParamPass"; } + +public: + bool run(loco::Graph *graph); + +private: + TensorVector _src_tensors; + TensorVector _dst_tensors; +}; + +} // namespace luci + +#endif //__LUCI_COPY_QUANT_PARAM_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h new file mode 100644 index 000000000..de08c8845 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FOLD_GATHER_PASS_H__ +#define __LUCI_FOLD_GATHER_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to fold Gather to a constant tensor + * + */ +struct FoldGatherPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FoldGatherPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FOLD_GATHER_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h new file mode 100644 index 000000000..0c489fc30 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__ +#define __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to propagate quantization parameters of an operator's output to input + */ +struct PropagateQParamBackwardPass final : public logo::Pass +{ + PropagateQParamBackwardPass(loco::DataType output) : _output_model_dtype(output) {} + + const char *name(void) const final { return "luci::PropagateQParamBackwardPass"; } + + bool run(loco::Graph *g) final; + +private: + loco::DataType _output_model_dtype; +}; + +} // namespace luci + +#endif // __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h index 7e0c44b8c..952bd9614 100644 --- a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h +++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ -#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ +#ifndef __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__ +#define __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__ #include <logo/Pass.h> @@ -23,15 +23,22 @@ namespace luci { /** - * @brief Class to propagate quantization parameters of an operator's output to input + * @brief Class to propagate quantization parameters of an operator's input to output */ -struct PropagateQuantParamPass final : public logo::Pass +struct PropagateQParamForwardPass final : public logo::Pass { - const char *name(void) const final { return "luci::PropagateQuantParamPass"; } + PropagateQParamForwardPass(bool TF_style_maxpool) : _TF_style_maxpool(TF_style_maxpool) {} + + PropagateQParamForwardPass() {} + + const char *name(void) const final { return "luci::PropagateQParamForwardPass"; } bool run(loco::Graph *g) final; + +private: + bool _TF_style_maxpool = false; }; } // namespace luci -#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ +#endif // __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h index 5c9cd427f..30c8db058 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h @@ -17,6 +17,10 @@ #ifndef __LUCI_QUANTIZATION_PARAMETERS_H__ #define __LUCI_QUANTIZATION_PARAMETERS_H__ +#include <loco.h> + +#include <string> + namespace luci { @@ -26,6 +30,13 @@ enum QuantizationGranularity ChannelWise = 1, }; +struct LayerInfo +{ + std::string name; + loco::DataType dtype; + QuantizationGranularity granularity; +}; + } // namespace luci #endif // __LUCI_QUANTIZATION_PARAMETERS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h index 68765ec5b..1825ee1aa 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h @@ -32,12 +32,30 @@ namespace luci class QuantizeDequantizeWeightsPass : public logo::Pass { public: + struct Context + { + loco::DataType input_model_dtype = loco::DataType::Unknown; + loco::DataType output_model_dtype = loco::DataType::Unknown; + QuantizationGranularity granularity = QuantizationGranularity::ChannelWise; + std::vector<LayerInfo> layers_info; + }; + +public: + QuantizeDequantizeWeightsPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)} + { + // DO NOTHING + } + +public: QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, QuantizationGranularity granularity) - : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{ - granularity} { - // DO NOTHING + _ctx = std::make_unique<Context>(); + { + _ctx->input_model_dtype = input_model_dtype; + _ctx->output_model_dtype = output_model_dtype; + _ctx->granularity = granularity; + } } virtual const char *name(void) const { return "luci::QuantizeDequantizeWeightsPass"; } @@ -45,9 +63,7 @@ public: bool run(loco::Graph *graph); private: - loco::DataType _input_model_dtype; - loco::DataType _output_model_dtype; - QuantizationGranularity _granularity; + std::unique_ptr<Context> _ctx; }; } // namespace luci diff --git a/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h new file mode 100644 index 000000000..c852f88e0 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__ +#define __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Pass to verify the input model has the form acceptable by quantizer + */ +class QuantizePreCheckerPass : public logo::Pass +{ +public: + const char *name(void) const final { return "luci::QuantizePreCheckerPass"; } + +public: + bool run(loco::Graph *graph) final; +}; + +} // namespace luci + +#endif //__LUCI_QUANTIZE_PRE_CHECKER_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h index 648abad70..ea6db85d1 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h @@ -23,6 +23,8 @@ #include <luci/Pass/QuantizationParameters.h> +#include <vector> + namespace luci { @@ -31,26 +33,41 @@ namespace luci */ class QuantizeWithMinMaxPass : public logo::Pass { +public: + struct Context + { + loco::DataType input_model_dtype = loco::DataType::Unknown; + loco::DataType output_model_dtype = loco::DataType::Unknown; + QuantizationGranularity granularity = QuantizationGranularity::ChannelWise; + loco::DataType input_type = loco::DataType::Unknown; + loco::DataType output_type = loco::DataType::Unknown; + bool TF_style_maxpool = false; + std::vector<LayerInfo> layers_info; + }; + // For backward-compatibility // TODO Remove this constructor public: QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, QuantizationGranularity granularity) - : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, - _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype} { - // DO NOTHING + _ctx = std::make_unique<Context>(); + { + _ctx->input_model_dtype = input_model_dtype; + _ctx->output_model_dtype = output_model_dtype; + _ctx->granularity = granularity; + _ctx->input_type = output_model_dtype; + _ctx->output_type = output_model_dtype; + _ctx->TF_style_maxpool = false; + } } public: - QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, - QuantizationGranularity granularity, loco::DataType input_type, - loco::DataType output_type) - : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, - _granularity{granularity}, _input_type{input_type}, _output_type{output_type} + QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)} { // DO NOTHING } + virtual const char *name(void) const { return "luci::QuantizeWithMinMaxPass"; } public: @@ -61,11 +78,7 @@ private: void set_output_type(loco::Graph *graph) const; private: - loco::DataType _input_model_dtype; - loco::DataType _output_model_dtype; - QuantizationGranularity _granularity; - loco::DataType _input_type; - loco::DataType _output_type; + std::unique_ptr<Context> _ctx; }; } // namespace luci diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h new file mode 100644 index 000000000..3e76bcdc3 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__ +#define __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to remove redundant quantize operations + */ +struct RemoveRedundantQuantizePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveRedundantQuantizePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__ diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.cpp index c1a06bfda..e3f126b15 100644 --- a/compiler/luci/pass/src/BatchNormPatternFinder.cpp +++ b/compiler/luci/pass/src/BatchNormPatternFinder.cpp @@ -44,10 +44,26 @@ bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::C return false; } - if (constant->rank() != 1) + uint32_t channel_dim = 0; + + if (constant->rank() == 1) + { + channel_dim = constant->dim(0).value(); + } + else if (constant->rank() == 4) + { + for (uint32_t i = 0; i < 3; i++) + { + if (constant->dim(i).value() != 1) + return false; + } + channel_dim = constant->dim(3).value(); + } + else + { return false; + } - auto channel_dim = constant->dim(0); // Assumption: Layout is channel-last if (!(channel_dim == add->dim(add->rank() - 1))) return false; @@ -90,10 +106,26 @@ bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node, return false; } - if (constant->rank() != 1) + uint32_t channel_dim = 0; + + if (constant->rank() == 1) + { + channel_dim = constant->dim(0).value(); + } + else if (constant->rank() == 4) + { + for (uint32_t i = 0; i < 3; i++) + { + if (constant->dim(i).value() != 1) + return false; + } + channel_dim = constant->dim(3).value(); + } + else + { return false; + } - auto channel_dim = constant->dim(0); // Assumption: Layout is channel-last if (!(channel_dim == mul->dim(mul->rank() - 1))) return false; diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp index 08e7fac1c..cc8c5615f 100644 --- a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp +++ b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp @@ -50,7 +50,7 @@ public: auto channel_size = *last_it; _add->shape(shape); - _add_beta->shape({channel_size}); + set_beta_shape(channel_size); _add_beta->size<loco::DataType::FLOAT32>(channel_size); for (uint32_t i = 0; i < channel_size; i++) _add_beta->at<loco::DataType::FLOAT32>(i) = i; @@ -63,10 +63,23 @@ public: luci::CircleAdd *add() { return _add; } protected: + virtual void set_beta_shape(uint32_t channel) = 0; + +protected: luci::CircleAdd *_add = nullptr; luci::CircleConst *_add_beta = nullptr; }; +class AddRank1BetaGraphlet : public AddBetaGraphlet +{ + void set_beta_shape(uint32_t channel) final { _add_beta->shape({channel}); } +}; + +class AddRank4BetaGraphlet : public AddBetaGraphlet +{ + void set_beta_shape(uint32_t channel) final { _add_beta->shape({1, 1, 1, channel}); } +}; + /** * @brief Graphlet with Mul and Const as gamma from BatchNorm */ @@ -90,7 +103,7 @@ public: auto channel_size = *last_it; _mul->shape(shape); - _mul_gamma->shape({channel_size}); + set_gamma_shape(channel_size); _mul_gamma->size<loco::DataType::FLOAT32>(channel_size); for (uint32_t i = 0; i < channel_size; i++) _mul_gamma->at<loco::DataType::FLOAT32>(i) = i; @@ -103,14 +116,27 @@ public: luci::CircleMul *mul(void) { return _mul; } protected: + virtual void set_gamma_shape(uint32_t channel) = 0; + +protected: luci::CircleMul *_mul = nullptr; luci::CircleConst *_mul_gamma = nullptr; }; +class MulRank1GammaGraphlet : public MulGammaGraphlet +{ + void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({channel}); } +}; + +class MulRank4GammaGraphlet : public MulGammaGraphlet +{ + void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({1, 1, 1, channel}); } +}; + /** * @brief Graph of Mul-Add pattern from BatchNorm */ -class MulAddGraph : public TestIOGraph, public AddBetaGraphlet, public MulGammaGraphlet +class MulAddGraph : public TestIOGraph, public AddRank1BetaGraphlet, public MulRank1GammaGraphlet { public: MulAddGraph() = default; @@ -118,8 +144,30 @@ public: void init(const ShapeU32 shape_in, const ShapeU32 shape_out) { TestIOGraph::init(shape_in, shape_out); - MulGammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE); - AddBetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU); + MulRank1GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE); + AddRank1BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU); + + // connect network + _mul->x(input()); + _mul->y(_mul_gamma); + _add->x(_mul); + _add->y(_add_beta); + output()->from(_add); + } +}; + +class MulAddRank4Graph : public TestIOGraph, + public AddRank4BetaGraphlet, + public MulRank4GammaGraphlet +{ +public: + MulAddRank4Graph() = default; + + void init(const ShapeU32 shape_in, const ShapeU32 shape_out) + { + TestIOGraph::init(shape_in, shape_out); + MulRank4GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE); + AddRank4BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU); // connect network _mul->x(input()); @@ -133,7 +181,7 @@ public: /** * @brief Graph of Add with Const */ -class AddGraph : public TestIOGraph, public AddBetaGraphlet +class AddGraph : public TestIOGraph, public AddRank1BetaGraphlet { public: AddGraph() = default; @@ -141,7 +189,24 @@ public: void init(const ShapeU32 shape_in, const ShapeU32 shape_out) { TestIOGraph::init(shape_in, shape_out); - AddBetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU); + AddRank1BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU); + + // connect network + _add->x(input()); + _add->y(_add_beta); + output()->from(_add); + } +}; + +class AddRank4Graph : public TestIOGraph, public AddRank4BetaGraphlet +{ +public: + AddRank4Graph() = default; + + void init(const ShapeU32 shape_in, const ShapeU32 shape_out) + { + TestIOGraph::init(shape_in, shape_out); + AddRank4BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU); // connect network _add->x(input()); @@ -160,6 +225,7 @@ public: protected: luci::test::MulAddGraph _mag; + luci::test::MulAddRank4Graph _mag_r4; }; class BatchNormPatternFinderAddTest : public ::testing::Test @@ -169,6 +235,7 @@ public: protected: luci::test::AddGraph _ag; + luci::test::AddRank4Graph _ag_r4; }; TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add) @@ -192,6 +259,19 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2) ASSERT_TRUE(res); } +TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add_rank4) +{ + _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4}); + + luci::CircleMul *mul = nullptr; + luci::CircleConst *beta = nullptr; + + auto res = luci::is_batchnorm_add(_mag_r4.add(), mul, beta); + ASSERT_TRUE(res); + ASSERT_NE(nullptr, mul); + ASSERT_NE(nullptr, beta); +} + TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG) { _ag.init({1, 16, 16, 4}, {1, 16, 16, 4}); @@ -215,3 +295,16 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul) ASSERT_NE(nullptr, pred); ASSERT_NE(nullptr, gamma); } + +TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul_rank4) +{ + _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4}); + + luci::CircleNode *pred = nullptr; + luci::CircleConst *gamma = nullptr; + + auto res = luci::is_batchnorm_mul(_mag_r4.mul(), pred, gamma); + ASSERT_TRUE(res); + ASSERT_NE(nullptr, pred); + ASSERT_NE(nullptr, gamma); +} diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 75f04b3b5..6dbb22d7c 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -22,9 +22,9 @@ #include "luci/Pass/FoldCastPass.h" #include "luci/Pass/FoldDepthwiseConv2DPass.h" #include "luci/Pass/FoldDequantizePass.h" +#include "luci/Pass/FoldGatherPass.h" #include "luci/Pass/FoldSparseToDensePass.h" #include "luci/Pass/ForwardReshapeToUnaryOpPass.h" -#include "luci/Pass/ForceQuantParamPass.h" #include "luci/Pass/FuseActivationFunctionPass.h" #include "luci/Pass/FuseAddWithFullyConnectedPass.h" #include "luci/Pass/FuseAddWithTConvPass.h" @@ -37,11 +37,11 @@ #include "luci/Pass/FusePreActivationBatchNormPass.h" #include "luci/Pass/FuseTransposeWithMeanPass.h" #include "luci/Pass/MakeBatchNormGammaPositivePass.h" -#include "luci/Pass/PropagateQuantParamPass.h" #include "luci/Pass/RemoveFakeQuantPass.h" #include "luci/Pass/RemoveQuantDequantSeqPass.h" #include "luci/Pass/RemoveRedundantReshapePass.h" #include "luci/Pass/RemoveRedundantTransposePass.h" +#include "luci/Pass/RemoveRedundantQuantizePass.h" #include "luci/Pass/RemoveUnnecessaryReshapePass.h" #include "luci/Pass/RemoveUnnecessarySlicePass.h" #include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h" @@ -52,9 +52,6 @@ #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h" -#include "luci/Pass/RequantizePass.h" -#include "luci/Pass/QuantizeWithMinMaxPass.h" -#include "luci/Pass/QuantizeDequantizeWeightsPass.h" #include "luci/Pass/SparsifyTensorPass.h" #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" #include "luci/Pass/SubstitutePackToReshapePass.h" @@ -75,9 +72,6 @@ #include "ModulePhase.h" #include "ProgressReporter.h" -#include "helpers/Strings.h" - -#include "QuantizedModelVerifier.h" #include <luci/IR/CircleNodes.h> #include <logo/Phase.h> @@ -91,37 +85,17 @@ namespace using namespace luci; -template <typename T> T lexical_cast(const std::string &str) -{ - std::istringstream ss; - ss.str(str); - T data; - ss >> data; - return data; -} - -template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv) -{ - std::vector<T> result; - std::transform(sv.begin(), sv.end(), std::back_inserter(result), - [](std::string str) -> T { return lexical_cast<T>(str); }); - return result; -} - class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options { public: void enable(Algorithm) final; void param(AlgorithmParameters, const std::string &) final; const std::string param(AlgorithmParameters) const final; - void params(AlgorithmParameters, std::vector<std::string> &) final; - std::vector<std::string> params(AlgorithmParameters) const final; bool query(Algorithm) final; private: std::vector<Algorithm> _algorithms; std::map<AlgorithmParameters, const std::string> _algorithm_params; - std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params; }; void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); } @@ -144,24 +118,6 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const } } -void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec) -{ - _multiple_params[param] = vec; -} - -std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const -{ - auto param_vec = _multiple_params.find(param); - if (param_vec != _multiple_params.end()) - { - return param_vec->second; - } - else - { - return std::vector<std::string>(); - } -} - bool OptimizeOptionsImpl::query(Algorithm algo) { std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo); @@ -312,6 +268,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::FoldDequantizePass>()); } + if (_options->query(Options::Algorithm::FoldGather)) + { + phase.emplace_back(std::make_unique<luci::FoldGatherPass>()); + } if (_options->query(Options::Algorithm::FoldSparseToDense)) { phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>()); @@ -368,6 +328,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>()); } + if (_options->query(Options::Algorithm::RemoveRedundantQuantize)) + { + phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>()); + } if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv)) { phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>()); @@ -417,174 +381,6 @@ void CircleOptimizer::optimize(loco::Graph *g) const phase_runner.run(phase); } -void CircleOptimizer::quantize(loco::Graph *g) const -{ - // Fake quantization of weights - if (_options->query(Options::Algorithm::QuantizeDequantizeWeights)) - { - static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"}; - static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"}; - static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"}; - - auto input_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); - auto output_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); - auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); - - if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype)) - throw std::runtime_error("Unsupported input type. List of supported input type: " + - to_string(fakeq_supported_input_model_dtype)); - - if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype)) - throw std::runtime_error("Unsupported output type. List of supported output type: " + - to_string(fakeq_supported_output_model_dtype)); - - if (!in_array(to_lower_case(granularity), fakeq_supported_granularity)) - throw std::runtime_error("Unsupported granularity. List of supported granularity: " + - to_string(fakeq_supported_granularity)); - - if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && - str_to_dtype(output_model_dtype) != loco::DataType::U8) - throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); - - // Clear existing quantparams before doing fake quantization - for (auto node : loco::active_nodes(loco::output_nodes(g))) - { - auto circle_node = loco::must_cast<luci::CircleNode *>(node); - if (circle_node->quantparam() != nullptr) - circle_node->quantparam(nullptr); - } - - luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype), - str_to_dtype(output_model_dtype), - str_to_granularity(granularity)); - fake_quantizer.run(g); - } - - // Actual quantization of weights, bias, and activation - if (_options->query(Options::Algorithm::QuantizeWithMinMax)) - { - static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"}; - static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"}; - static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"}; - static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"}; - static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"}; - - auto input_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); - auto output_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); - auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); - auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type); - if (input_type.empty()) - input_type = output_model_dtype; - auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type); - if (output_type.empty()) - output_type = output_model_dtype; - - if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype)) - throw std::runtime_error("Unsupported input type. List of supported input types: " + - to_string(qwmm_supported_input_model_dtype)); - - if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype)) - throw std::runtime_error("Unsupported output type. List of supported output types: " + - to_string(qwmm_supported_output_model_dtype)); - - if (!in_array(to_lower_case(granularity), qwmm_supported_granularity)) - throw std::runtime_error("Unsupported granularity. List of supported granularity: " + - to_string(qwmm_supported_granularity)); - - if (!in_array(to_lower_case(input_type), qwmm_supported_input_type)) - throw std::runtime_error("Unsupported input type. List of supported input types: " + - to_string(qwmm_supported_input_type)); - - if (!in_array(to_lower_case(output_type), qwmm_supported_output_type)) - throw std::runtime_error("Unsupported output type. List of supported output types: " + - to_string(qwmm_supported_output_type)); - - if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && - str_to_dtype(output_model_dtype) != loco::DataType::U8) - throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); - - luci::QuantizeWithMinMaxPass quantizer( - str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype), - str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type)); - quantizer.run(g); - - // Post-quantization optimizations - logo::Phase phase; - - phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>()); - - phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); - phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); - phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>()); - - ProgressReporter prog(g, logo::PhaseStrategy::Saturate); - logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; - phase_runner.attach(&prog); - phase_runner.run(phase); - - // Verify the type/granularity of the quantized model - luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype), - str_to_granularity(granularity)); - verifier.verify(g); - } - - // Requantize - if (_options->query(Options::Algorithm::Requantize)) - { - static const std::vector<std::string> rq_supported_input_model_dtype{"int8"}; - static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"}; - - auto input_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); - auto output_model_dtype = - _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); - - if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype)) - throw std::runtime_error("Unsupported input type. List of supported input types: " + - to_string(rq_supported_input_model_dtype)); - - if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype)) - throw std::runtime_error("Unsupported output type. List of supported output types: " + - to_string(rq_supported_output_model_dtype)); - - luci::RequantizePass requantizer(str_to_dtype(input_model_dtype), - str_to_dtype(output_model_dtype)); - requantizer.run(g); - } - - // Force to write quantparam to specified tensors - // NOTE Only per-tensor (not per-channel) qparam can be written - if (_options->query(Options::Algorithm::ForceQuantParam)) - { - ForceQuantParamPass::TensorVector tensors = - _options->params(Options::AlgorithmParameters::Quantize_tensor_names); - auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales); - auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points); - - // Cast scales/zero_points to proper types - ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales); - ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points); - - ForceQuantParamPass fq(tensors, scales, zero_points); - fq.run(g); - } - - logo::Phase phase; - - // Do Shape/Type inference - phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); - phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); - - ProgressReporter prog(g, logo::PhaseStrategy::Saturate); - logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; - phase_runner.attach(&prog); - phase_runner.run(phase); -} - void CircleOptimizer::sparsify(loco::Graph *g) const { if (_options->query(Options::Algorithm::SparsifyTensorPass)) diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp index a1b5c7f80..041fc7d75 100644 --- a/compiler/luci/pass/src/CircleOptimizer.test.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp @@ -71,171 +71,3 @@ TEST(CircleOptimizerTest, sparsify_simple) SUCCEED(); } - -TEST(CircleOptimizerTest, quantize_quantdequant_simple) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - o.quantize(&g); - - SUCCEED(); -} - -TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "invalid"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_minmax_simple) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - o.quantize(&g); - - SUCCEED(); -} - -TEST(CircleOptimizerTest, quantize_minmax_input_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_minmax_output_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_granularity, "layer"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_minmax_gran_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - options->param(AlgorithmParameters::Quantize_granularity, "invalid"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_requant_simple) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - - o.quantize(&g); - - SUCCEED(); -} - -TEST(CircleOptimizerTest, quantize_requant_input_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} - -TEST(CircleOptimizerTest, quantize_requant_output_NEG) -{ - loco::Graph g; - luci::CircleOptimizer o; - - auto options = o.options(); - - options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); - options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); - - EXPECT_THROW(o.quantize(&g), std::runtime_error); -} diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp new file mode 100644 index 000000000..ce38a90b9 --- /dev/null +++ b/compiler/luci/pass/src/CircleQuantizer.cpp @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/CircleQuantizer.h" + +#include "luci/Pass/CopyQuantParamPass.h" +#include "luci/Pass/ForceQuantParamPass.h" +#include "luci/Pass/PropagateQParamForwardPass.h" +#include "luci/Pass/RequantizePass.h" +#include "luci/Pass/ConvertToFakeQuantizedModelPass.h" +#include "luci/Pass/FoldDequantizePass.h" +#include "luci/Pass/QuantizePreCheckerPass.h" +#include "luci/Pass/QuantizeWithMinMaxPass.h" +#include "luci/Pass/QuantizeDequantizeWeightsPass.h" + +#include "luci/Pass/CircleShapeInferencePass.h" +#include "luci/Pass/CircleTypeInferencePass.h" + +// logo passes +#include <logo/RemoveDeadNodeWithQueryPass.h> + +#include "ProgressReporter.h" +#include "helpers/Strings.h" + +#include "QuantizedModelVerifier.h" + +#include <luci/IR/CircleNode.h> +#include <logo/Phase.h> + +#include <memory> + +namespace +{ + +using namespace luci; +using LayerParam = luci::CircleQuantizer::Options::LayerParam; + +template <typename T> T lexical_cast(const std::string &str) +{ + std::istringstream ss; + ss.str(str); + T data; + ss >> data; + return data; +} + +template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv) +{ + std::vector<T> result; + std::transform(sv.begin(), sv.end(), std::back_inserter(result), + [](std::string str) -> T { return lexical_cast<T>(str); }); + return result; +} + +class QuantizeOptionsImpl final : public luci::CircleQuantizer::Options +{ +public: + void enable(Algorithm) final; + void param(AlgorithmParameters, const std::string &) final; + const std::string param(AlgorithmParameters) const final; + void params(AlgorithmParameters, std::vector<std::string> &) final; + std::vector<std::string> params(AlgorithmParameters) const final; + void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) final; + std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const final; + bool query(Algorithm) final; + +private: + std::vector<Algorithm> _algorithms; + std::map<AlgorithmParameters, const std::string> _algorithm_params; + std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params; + std::map<AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>>> _layer_params; +}; + +void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); } + +void QuantizeOptionsImpl::param(AlgorithmParameters param, const std::string &str) +{ + _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str)); +} + +const std::string QuantizeOptionsImpl::param(AlgorithmParameters param) const +{ + auto param_str = _algorithm_params.find(param); + if (param_str != _algorithm_params.end()) + { + return param_str->second; + } + else + { + return std::string(); + } +} + +void QuantizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec) +{ + _multiple_params[param] = vec; +} + +std::vector<std::string> QuantizeOptionsImpl::params(AlgorithmParameters param) const +{ + auto param_vec = _multiple_params.find(param); + if (param_vec != _multiple_params.end()) + { + return param_vec->second; + } + else + { + return std::vector<std::string>(); + } +} + +void QuantizeOptionsImpl::layer_params(AlgorithmParameters param, + std::vector<std::shared_ptr<LayerParam>> &vec) +{ + _layer_params[param] = vec; +} + +std::vector<std::shared_ptr<LayerParam>> +QuantizeOptionsImpl::layer_params(AlgorithmParameters param) const +{ + auto param_vec = _layer_params.find(param); + if (param_vec != _layer_params.end()) + { + return param_vec->second; + } + else + { + return std::vector<std::shared_ptr<LayerParam>>(); + } +} + +bool QuantizeOptionsImpl::query(Algorithm algo) +{ + std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo); + if (it == _algorithms.end()) + return false; + + return true; +} + +} // namespace + +namespace luci +{ + +CircleQuantizer::Options *CircleQuantizer::options(void) +{ + if (_options == nullptr) + { + _options = std::make_unique<QuantizeOptionsImpl>(); + } + + return _options.get(); +} + +void CircleQuantizer::quantize(loco::Graph *g) const +{ + // Fake quantization of weights + if (_options->query(Options::Algorithm::QuantizeDequantizeWeights)) + { + static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"}; + static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"}; + static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"}; + + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); + auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); + auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params); + + if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype)) + throw std::runtime_error("Unsupported input type. List of supported input type: " + + to_string(fakeq_supported_input_model_dtype)); + + if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype)) + throw std::runtime_error("Unsupported output type. List of supported output type: " + + to_string(fakeq_supported_output_model_dtype)); + + if (!in_array(to_lower_case(granularity), fakeq_supported_granularity)) + throw std::runtime_error("Unsupported granularity. List of supported granularity: " + + to_string(fakeq_supported_granularity)); + + if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && + str_to_dtype(output_model_dtype) != loco::DataType::U8) + throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); + + // Check dtype/granularity of layer params + for (auto layer_param : layer_params) + { + auto name = layer_param->name; + if (!in_array(to_lower_case(layer_param->dtype), fakeq_supported_output_model_dtype)) + { + throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " + + to_string(fakeq_supported_output_model_dtype)); + } + if (!in_array(to_lower_case(layer_param->granularity), fakeq_supported_granularity)) + { + throw std::runtime_error( + "Unsupported granularity in " + name + + ". List of supported granularity: " + to_string(fakeq_supported_granularity)); + } + } + + // Clear existing quantparams before doing fake quantization + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (circle_node->quantparam() != nullptr) + circle_node->quantparam(nullptr); + } + + auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>(); + { + ctx->input_model_dtype = str_to_dtype(input_model_dtype); + ctx->output_model_dtype = str_to_dtype(output_model_dtype); + ctx->granularity = str_to_granularity(granularity); + + for (auto layer_param : layer_params) + { + LayerInfo info; + { + info.name = layer_param->name; + info.dtype = str_to_dtype(layer_param->dtype); + info.granularity = str_to_granularity(layer_param->granularity); + } + ctx->layers_info.emplace_back(info); + } + } + + luci::QuantizeDequantizeWeightsPass fake_quantizer(std::move(ctx)); + + fake_quantizer.run(g); + } + + // Actual quantization of weights, bias, and activation + if (_options->query(Options::Algorithm::QuantizeWithMinMax)) + { + static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"}; + static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"}; + static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"}; + static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"}; + static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"}; + + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); + auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); + auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type); + if (input_type.empty()) + input_type = output_model_dtype; + auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type); + if (output_type.empty()) + output_type = output_model_dtype; + + bool TF_style_maxpool = + _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True"; + + auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params); + + if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype)) + throw std::runtime_error("Unsupported input type. List of supported input types: " + + to_string(qwmm_supported_input_model_dtype)); + + if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype)) + throw std::runtime_error("Unsupported output type. List of supported output types: " + + to_string(qwmm_supported_output_model_dtype)); + + if (!in_array(to_lower_case(granularity), qwmm_supported_granularity)) + throw std::runtime_error("Unsupported granularity. List of supported granularity: " + + to_string(qwmm_supported_granularity)); + + if (!in_array(to_lower_case(input_type), qwmm_supported_input_type)) + throw std::runtime_error("Unsupported input type. List of supported input types: " + + to_string(qwmm_supported_input_type)); + + if (!in_array(to_lower_case(output_type), qwmm_supported_output_type)) + throw std::runtime_error("Unsupported output type. List of supported output types: " + + to_string(qwmm_supported_output_type)); + + if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && + str_to_dtype(output_model_dtype) != loco::DataType::U8) + throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); + + // Check dtype/granularity of layer params + for (auto layer_param : layer_params) + { + auto name = layer_param->name; + if (!in_array(to_lower_case(layer_param->dtype), qwmm_supported_output_model_dtype)) + { + throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " + + to_string(qwmm_supported_output_model_dtype)); + } + if (!in_array(to_lower_case(layer_param->granularity), qwmm_supported_granularity)) + { + throw std::runtime_error( + "Unsupported granularity in " + name + + ". List of supported granularity: " + to_string(qwmm_supported_granularity)); + } + } + + // Input model checker for quantization + luci::QuantizePreCheckerPass input_model_checker{}; + input_model_checker.run(g); + + auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>(); + { + ctx->input_model_dtype = str_to_dtype(input_model_dtype); + ctx->output_model_dtype = str_to_dtype(output_model_dtype); + ctx->granularity = str_to_granularity(granularity); + ctx->input_type = str_to_dtype(input_type); + ctx->output_type = str_to_dtype(output_type); + ctx->TF_style_maxpool = TF_style_maxpool; + + for (auto layer_param : layer_params) + { + LayerInfo info; + { + info.name = layer_param->name; + info.dtype = str_to_dtype(layer_param->dtype); + info.granularity = str_to_granularity(layer_param->granularity); + } + ctx->layers_info.emplace_back(info); + } + } + + luci::QuantizeWithMinMaxPass quantizer(std::move(ctx)); + + quantizer.run(g); + + auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>(); + { + verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype); + verify_ctx->granularity = str_to_granularity(granularity); + verify_ctx->input_type = str_to_dtype(input_type); + verify_ctx->output_type = str_to_dtype(output_type); + verify_ctx->TF_style_maxpool = TF_style_maxpool; + + for (auto layer_param : layer_params) + { + LayerInfo info; + { + info.name = layer_param->name; + info.dtype = str_to_dtype(layer_param->dtype); + info.granularity = str_to_granularity(layer_param->granularity); + } + verify_ctx->layers_info.emplace_back(info); + } + } + + // Verify the type/granularity of the quantized model + luci::QuantizedModelVerifier verifier(std::move(verify_ctx)); + + verifier.verify(g); + } + + // Requantize + if (_options->query(Options::Algorithm::Requantize)) + { + static const std::vector<std::string> rq_supported_input_model_dtype{"int8"}; + static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"}; + + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); + + if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype)) + throw std::runtime_error("Unsupported input type. List of supported input types: " + + to_string(rq_supported_input_model_dtype)); + + if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype)) + throw std::runtime_error("Unsupported output type. List of supported output types: " + + to_string(rq_supported_output_model_dtype)); + + luci::RequantizePass requantizer(str_to_dtype(input_model_dtype), + str_to_dtype(output_model_dtype)); + requantizer.run(g); + } + + // Force to write quantparam to specified tensors + // NOTE Only per-tensor (not per-channel) qparam can be written + if (_options->query(Options::Algorithm::ForceQuantParam)) + { + ForceQuantParamPass::TensorVector tensors = + _options->params(Options::AlgorithmParameters::Quantize_tensor_names); + auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales); + auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points); + + // Cast scales/zero_points to proper types + ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales); + ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points); + + ForceQuantParamPass fq(tensors, scales, zero_points); + fq.run(g); + } + + // Copy quantparam of a tensor to another tensor + if (_options->query(Options::Algorithm::CopyQuantParam)) + { + CopyQuantParamPass::TensorVector src_tensors = + _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names); + CopyQuantParamPass::TensorVector dst_tensors = + _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names); + + CopyQuantParamPass cq(src_tensors, dst_tensors); + cq.run(g); + } + + // Convert quantized model to fake-quantized model + if (_options->query(Options::Algorithm::ConvertToFakeQuantizedModel)) + { + luci::ConvertToFakeQuantizedModelPass fake_quantizer; + fake_quantizer.run(g); + + logo::Phase phase; + + // Default passes + phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>()); + phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); + phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); + + // Fold Dequantize Ops generated during fake quantization + phase.emplace_back(std::make_unique<luci::FoldDequantizePass>()); + + ProgressReporter prog(g, logo::PhaseStrategy::Restart); + logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); + } + + logo::Phase phase; + + // Do Shape/Type inference + phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); + phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); +} + +} // namespace luci diff --git a/compiler/luci/pass/src/CircleQuantizer.test.cpp b/compiler/luci/pass/src/CircleQuantizer.test.cpp new file mode 100644 index 000000000..5766d5fe5 --- /dev/null +++ b/compiler/luci/pass/src/CircleQuantizer.test.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/CircleQuantizer.h" + +#include <gtest/gtest.h> + +using namespace luci; +using Algorithms = luci::CircleQuantizer::Options::Algorithm; +using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters; + +TEST(CircleQuantizerTest, quantize_quantdequant_simple) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeDequantizeWeights); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + o.quantize(&g); + + SUCCEED(); +} + +TEST(CircleQuantizerTest, quantize_quantdequant_input_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeDequantizeWeights); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_quantdequant_output_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeDequantizeWeights); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_quantdequant_gran_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeDequantizeWeights); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "invalid"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_minmax_simple) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeWithMinMax); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + o.quantize(&g); + + SUCCEED(); +} + +TEST(CircleQuantizerTest, quantize_minmax_input_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeWithMinMax); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_minmax_output_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeWithMinMax); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_granularity, "layer"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_minmax_gran_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::QuantizeWithMinMax); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_granularity, "invalid"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_requant_simple) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::Requantize); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + + o.quantize(&g); + + SUCCEED(); +} + +TEST(CircleQuantizerTest, quantize_requant_input_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::Requantize); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} + +TEST(CircleQuantizerTest, quantize_requant_output_NEG) +{ + loco::Graph g; + luci::CircleQuantizer o; + + auto options = o.options(); + + options->enable(Algorithms::Requantize); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); + + EXPECT_THROW(o.quantize(&g), std::runtime_error); +} diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp index 270714049..ce4f54035 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp @@ -228,6 +228,9 @@ bool check_4d_reshape(loco::Node *node, const std::vector<int32_t> indices) if (input->shape_status() != luci::ShapeStatus::VALID) return false; + if (input->rank() != 4) + return false; + if (reshape->shape_status() != luci::ShapeStatus::VALID) return false; @@ -804,6 +807,8 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> return true; } + bool visit(luci::CircleElu *node) { return convert_unary_features<luci::CircleElu>(node); } + bool visit(luci::CircleLeakyRelu *node) { return convert_unary_features<luci::CircleLeakyRelu>(node); @@ -1240,6 +1245,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) break; case luci::CircleOpcode::ADD: case luci::CircleOpcode::CONCATENATION: + case luci::CircleOpcode::ELU: case luci::CircleOpcode::LEAKY_RELU: case luci::CircleOpcode::LOGISTIC: case luci::CircleOpcode::MAXIMUM: diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp index c9412fbb1..dd81d1380 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp @@ -264,6 +264,22 @@ public: luci::CircleConst *input2 = nullptr; }; +class EluGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + elu = g.nodes()->create<luci::CircleElu>(); + elu->features(input); + elu->name("elu"); + + return elu; + } + +public: + luci::CircleElu *elu = nullptr; +}; + class LeakyReluGraph final : public SimpleGraph { protected: @@ -941,6 +957,26 @@ TEST(ConvertNCHWToNHWC, Concatenation) EXPECT_EQ(3, g.concat->axis()); } +TEST(ConvertNCHWToNHWC, Elu) +{ + EluGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.elu->features()); + + auto elu_succs = loco::succs(g.elu); + EXPECT_EQ(1, elu_succs.size()); + check_post_trans(*elu_succs.begin()); + + // Check elu shape + EXPECT_EQ(1, g.elu->dim(0).value()); + EXPECT_EQ(4, g.elu->dim(1).value()); + EXPECT_EQ(4, g.elu->dim(2).value()); + EXPECT_EQ(16, g.elu->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, LeakyRelu) { LeakyReluGraph g; diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp new file mode 100644 index 000000000..11970fff5 --- /dev/null +++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ConvertToFakeQuantizedModelPass.h" +#include "luci/Pass/QuantizationParameters.h" + +#include "QuantizationUtils.h" + +#include <luci/Profile/CircleNodeOrigin.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Log.h> + +namespace +{ + +// Create Quantize Op whose dtype/shape/qparam are the same with node +luci::CircleQuantize *create_quantize(luci::CircleNode *node) +{ + auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>(); + quantize->name(node->name() + "_Quantize"); + quantize->dtype(node->dtype()); + quantize->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + quantize->dim(i).set(node->dim(i).value()); + + quantize->shape_status(luci::ShapeStatus::VALID); + + copy_quantparam(node, quantize); + + luci::add_origin(quantize, luci::get_origin(node)); + + return quantize; +} + +// Create Dequantize Op whose shape is the same with node +luci::CircleDequantize *create_dequantize(luci::CircleNode *node) +{ + auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>(); + dequantize->name(node->name() + "_Dequantize"); + dequantize->dtype(loco::DataType::FLOAT32); + dequantize->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + dequantize->dim(i).set(node->dim(i).value()); + + dequantize->shape_status(luci::ShapeStatus::VALID); + + luci::add_origin(dequantize, luci::get_origin(node)); + + return dequantize; +} + +// Return true if node is quantized activation +// 1. dtype is u8 or s16 +// 2. node has qparam +bool is_quant_act(const luci::CircleNode *node) +{ + if (node->dtype() != loco::DataType::U8 and node->dtype() != loco::DataType::S16) + return false; + + if (not node->quantparam()) + return false; + + return true; +} + +// Return true if node is quantized const +// 1. dtype is not fp32 +// 2. node has qparam +// NOTE Quantized const can have the following types +// u8 (weights, activation), s16 (weights, activation), s32 (bias), s64 (bias) +bool is_quant_const(const luci::CircleConst *node) +{ + if (node->dtype() == loco::DataType::FLOAT32) + return false; + + if (not node->quantparam()) + return false; + + return true; +} + +// Insert dequantize Op after node +void insert_dequantize(loco::Node *lnode) +{ + auto node = loco::must_cast<luci::CircleNode *>(lnode); + auto dequant = create_dequantize(node); + loco::replace(node).with(dequant); + dequant->input(node); +} + +// Insert quantize Op after node and return the quantize Op +luci::CircleQuantize *insert_quantize(loco::Node *lnode) +{ + auto node = loco::must_cast<luci::CircleNode *>(lnode); + auto quant = create_quantize(node); + loco::replace(node).with(quant); + quant->input(node); + return quant; +} + +// Dequantize node +void dequantize(luci::CircleNode *node) +{ + node->dtype(loco::DataType::FLOAT32); + node->quantparam(nullptr); +} + +// Do fake quantization on quantized activation +// 1. Insert Quantize-Dequantize Ops +// 2. Update dtype/quantparam of node +void fq_activation(luci::CircleNode *node) +{ + if (not is_quant_act(node)) + return; + + auto quant = insert_quantize(node); + insert_dequantize(quant); + + dequantize(node); +} + +#define RETURN_UNLESS(COND) \ + if (not(COND)) \ + return; + +// Visitor to do fake quantization for each Op +// For non-const activation, insert Quantize-Dequantize after the ofm +// For quantized const, insert Dequantize after the const +struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void> +{ + void visit(luci::CircleNode *node) + { + throw std::runtime_error("Unsupported op for fake quantization in " + node->name()); + } + + void visit(luci::CircleInput *node) + { + RETURN_UNLESS(is_quant_act(node)); + + auto quant = insert_quantize(node); + insert_dequantize(quant); + + dequantize(node); + + // Update graph input + const auto inputs = node->graph()->inputs(); + auto graph_input = inputs->at(node->index()); + graph_input->dtype(loco::DataType::FLOAT32); + } + + void visit(luci::CircleOutput *node) + { + RETURN_UNLESS(is_quant_act(node)); + + dequantize(node); + + // Update graph output + const auto outputs = node->graph()->outputs(); + auto graph_output = outputs->at(node->index()); + graph_output->dtype(loco::DataType::FLOAT32); + } + + // For quantized const, insert Dequantize Op + void visit(luci::CircleConst *node) + { + RETURN_UNLESS(is_quant_const(node)); + + insert_dequantize(node); + } + + // For non-const activation, insert Quantize-Dequantize Ops + // and dequantize the node + void visit(luci::CircleConv2D *node) { fq_activation(node); } + void visit(luci::CircleAdd *node) { fq_activation(node); } +}; + +#undef RETURN_UNLESS + +} // namespace + +namespace luci +{ + +bool ConvertToFakeQuantizedModelPass::run(loco::Graph *g) +{ + LOGGER(l); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + INFO(l) << "ConvertToFakeQuantizedModelPass visit node: " << circle_node->name() << std::endl; + + FakeQuantize fq; + circle_node->accept(&fq); + } + + // One time run + return false; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp new file mode 100644 index 000000000..560d68a74 --- /dev/null +++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <logo/Phase.h> + +#include "luci/Pass/ConvertToFakeQuantizedModelPass.h" +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +// Check the below pattern +// Quantize (scale, zp) -> Dequantize (node) +void check_q_dq(loco::Node *node, float scale, int64_t zp) +{ + auto dequant = dynamic_cast<luci::CircleDequantize *>(node); + EXPECT_TRUE(dequant != nullptr); + auto quant = dynamic_cast<luci::CircleQuantize *>(dequant->input()); + EXPECT_TRUE(quant != nullptr); + auto qparam = quant->quantparam(); + EXPECT_EQ(scale, qparam->scale[0]); + EXPECT_EQ(zp, qparam->zerop[0]); +} + +// Check the below pattern +// Dequantize (node) +void check_dq(loco::Node *node) +{ + auto dequant = dynamic_cast<luci::CircleDequantize *>(node); + EXPECT_TRUE(dequant != nullptr); +} + +void set_qparam(luci::CircleNode *node, float scale, int64_t zp) +{ + auto qparam = std::make_unique<luci::CircleQuantParam>(); + { + qparam->scale.push_back(scale); + qparam->zerop.push_back(zp); + } + node->quantparam(std::move(qparam)); +} + +/** + * SimpleGraph for testing + * - Child class should implement insertGraphBody() + * + * Example (U8ConvGraph inherits SimpleGraph and create Conv2D Op) + * + * BEFORE + * - A model is quantized (ex: u8) + * + * [Input(u8)] [Filter(u8)] [Bias(s32)] + * \ | / + * \ | / + * \ | / + * [Conv2D(u8)] + * | + * [Output(u8)] + * + * AFTER + * - Ops are converted to fp32 + * - Quantize/Dequantize Ops are inserted properly + * - Q-DQ is inserted after non-const activation + * - DQ is inserted after const + * + * [Input(u8)] + * | + * [Quant(u8)] [Filter(u8)] [Bias(s32)] + * | | | + * [Dequant(fp32)] [Dequant(fp32)] [Dequant(fp32)] + * \ | / + * \ | / + * \ | / + * [Conv2D(fp32)] + * | + * [Quant(u8)] + * | + * [Dequant(fp32)] + * | + * [Output(fp32)] + */ +template <loco::DataType T> class SimpleGraph +{ +public: + void init() + { + input = g.nodes()->create<luci::CircleInput>(); + output = g.nodes()->create<luci::CircleOutput>(); + input->name("input"); + output->name("output"); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + graph_input->dtype(T); + input->dtype(T); + output->dtype(T); + graph_output->dtype(T); + + graph_input->shape({1, 4, 4, 4}); + input->shape({1, 4, 4, 4}); + output->shape({1, 4, 4, 4}); + graph_output->shape({1, 4, 4, 4}); + + set_qparam(input, 1.0, 0); + set_qparam(output, 1.0, 0); + + auto graph_body = insertGraphBody(input); + output->from(graph_body); + } + + virtual ~SimpleGraph() = default; + +protected: + virtual loco::Node *insertGraphBody(loco::Node *input) = 0; + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class U8ConvGraph final : public SimpleGraph<loco::DataType::U8> +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + conv = g.nodes()->create<luci::CircleConv2D>(); + weights = g.nodes()->create<luci::CircleConst>(); + bias = g.nodes()->create<luci::CircleConst>(); + + conv->dtype(loco::DataType::U8); + weights->dtype(loco::DataType::U8); + bias->dtype(loco::DataType::S32); + + conv->shape({1, 4, 4, 4}); + weights->shape({4, 1, 1, 4}); + bias->shape({4}); + + weights->size<loco::DataType::U8>(16); + for (uint32_t i = 0; i < 16; i++) + weights->at<loco::DataType::U8>(i) = i; + + bias->size<loco::DataType::S32>(4); + for (uint32_t i = 0; i < 4; i++) + bias->at<loco::DataType::S32>(i) = i; + + set_qparam(conv, 2.0, 127); + set_qparam(weights, 2.0, 127); + set_qparam(bias, 2.0, 127); + + conv->input(input); + conv->filter(weights); + conv->bias(bias); + + conv->name("conv"); + weights->name("weights"); + bias->name("bias"); + + return conv; + } + +public: + luci::CircleConv2D *conv = nullptr; + luci::CircleConst *weights = nullptr; + luci::CircleConst *bias = nullptr; +}; + +class FP32ConvGraph final : public SimpleGraph<loco::DataType::FLOAT32> +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + conv = g.nodes()->create<luci::CircleConv2D>(); + weights = g.nodes()->create<luci::CircleConst>(); + bias = g.nodes()->create<luci::CircleConst>(); + + conv->dtype(loco::DataType::FLOAT32); + weights->dtype(loco::DataType::FLOAT32); + bias->dtype(loco::DataType::FLOAT32); + + conv->shape({1, 4, 4, 4}); + weights->shape({4, 1, 1, 4}); + bias->shape({4}); + + weights->size<loco::DataType::FLOAT32>(16); + for (uint32_t i = 0; i < 16; i++) + weights->at<loco::DataType::FLOAT32>(i) = i; + + bias->size<loco::DataType::FLOAT32>(4); + for (uint32_t i = 0; i < 4; i++) + bias->at<loco::DataType::FLOAT32>(i) = i; + + conv->input(input); + conv->filter(weights); + conv->bias(bias); + + conv->name("conv"); + weights->name("weights"); + bias->name("bias"); + + return conv; + } + +public: + luci::CircleConv2D *conv = nullptr; + luci::CircleConst *weights = nullptr; + luci::CircleConst *bias = nullptr; +}; + +} // namespace + +TEST(ConvertToFakeQuantizedModelTest, U8Conv2D) +{ + U8ConvGraph g; + g.init(); + + luci::ConvertToFakeQuantizedModelPass fq; + fq.run(&g.g); + + // Check ifm + check_q_dq(g.conv->input(), 1.0, 0); + + // Check weights + check_dq(g.conv->filter()); + + // Check bias + check_dq(g.conv->bias()); + + // Check ofm + check_q_dq(g.output->from(), 2.0, 127); + + SUCCEED(); +} + +TEST(ConvertToFakeQuantizedModelTest, F32Conv2D_NEG) +{ + FP32ConvGraph g; + g.init(); + + luci::ConvertToFakeQuantizedModelPass fq; + fq.run(&g.g); + + uint32_t dequant_count = 0; + uint32_t quant_count = 0; + + for (auto node : loco::active_nodes(loco::output_nodes(&g.g))) + { + auto cnode = loco::must_cast<luci::CircleNode *>(node); + auto opcode = cnode->opcode(); + if (opcode == luci::CircleOpcode::DEQUANTIZE) + dequant_count++; + if (opcode == luci::CircleOpcode::QUANTIZE) + quant_count++; + } + + // Check no quant/dequant Op is inserted + EXPECT_EQ(0, quant_count); + EXPECT_EQ(0, dequant_count); +} diff --git a/compiler/luci/pass/src/CopyQuantParamPass.cpp b/compiler/luci/pass/src/CopyQuantParamPass.cpp new file mode 100644 index 000000000..9b1bb0ea9 --- /dev/null +++ b/compiler/luci/pass/src/CopyQuantParamPass.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/CopyQuantParamPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Log.h> + +namespace luci +{ + +namespace +{ + +struct SrcDst +{ + CircleNode *src = nullptr; + CircleNode *dst = nullptr; +}; + +} // namespace + +bool CopyQuantParamPass::run(loco::Graph *g) +{ + LOGGER(l); + + INFO(l) << "CopyQuantParamPass Start" << std::endl; + + if (_src_tensors.size() != _dst_tensors.size()) + throw std::runtime_error("The numbers of Source/Destination tensors do not match."); + + // Return src/dst CircleNodes + auto get_src_dst = [&g](std::string src, std::string dst) { + SrcDst src_dst; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto const cnode = loco::must_cast<CircleNode *>(node); + auto const name = cnode->name(); + if (name == src) + src_dst.src = cnode; + + if (name == dst) + src_dst.dst = cnode; + } + return src_dst; + }; + + for (uint32_t i = 0; i < _src_tensors.size(); i++) + { + auto src = _src_tensors[i]; + auto dst = _dst_tensors[i]; + + auto nodes = get_src_dst(src, dst); + if (not nodes.src) + throw std::runtime_error("The tensor named " + src + " does not exist."); + + if (not nodes.dst) + throw std::runtime_error("The tensor named " + dst + " does not exist."); + + copy_quantparam(nodes.src, nodes.dst); + + INFO(l) << "Quantparam of " << src << " is copied to " << dst << std::endl; + } + + INFO(l) << "CopyQuantParamPass End" << std::endl; + + return false; // one time run +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldGatherPass.cpp b/compiler/luci/pass/src/FoldGatherPass.cpp new file mode 100644 index 000000000..f179d74bd --- /dev/null +++ b/compiler/luci/pass/src/FoldGatherPass.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldGatherPass.h" +#include "CircleOptimizerUtils.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +/** + * Fold to const if + * + * 1. params: const and dtype = S32 or S64 + * 2. indices: const and dtype = S32 or S64 + * + * BEFORE + * + * [CircleConst] [CircleConst] + * | | + * +---------[Gather]---------+ + * + * AFTER + * + * [CircleConst] + * + **/ +template <loco::DataType InputT, loco::DataType IndexT> +bool fold_gather(luci::CircleGather *gather_node) +{ + const auto params = loco::must_cast<luci::CircleConst *>(gather_node->params()); + const auto indices = loco::must_cast<luci::CircleConst *>(gather_node->indices()); + + const auto rank = params->rank(); + auto axis = gather_node->axis(); + if (axis < 0) + { + axis += static_cast<int32_t>(rank); + } + + if (axis < 0 or axis >= static_cast<int32_t>(rank)) + throw std::runtime_error("Unsupported axis value"); + + const auto name = gather_node->name(); + assert(name.length() > 0); + + auto constant = gather_node->graph()->nodes()->create<luci::CircleConst>(); + constant->dtype(InputT); + constant->name(name + "_folded"); + + constant->rank(rank + indices->rank() - 1); + + assert(constant->rank() > 0); + + std::vector<uint32_t> shape; + for (uint32_t i = 0; i < rank; ++i) + { + if (i != static_cast<uint32_t>(axis)) + { + const auto dim = params->dim(i).value(); + shape.push_back(dim); + } + else + { + for (uint32_t j = 0; j < indices->rank(); ++j) + { + const auto dim = indices->dim(j).value(); + shape.push_back(dim); + } + } + } + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + constant->dim(i).set(shape.at(i)); + size *= shape.at(i); + } + + constant->size<InputT>(size); + + uint32_t outer_size = 1; + for (uint32_t i = 0; i < static_cast<uint32_t>(axis); ++i) + { + outer_size *= params->dim(i).value(); + } + + uint32_t inner_size = 1; + for (uint32_t i = axis + 1; i < rank; ++i) + { + inner_size *= params->dim(i).value(); + } + + uint32_t coord_size = 1; + for (uint32_t i = 0; i < indices->rank(); ++i) + { + coord_size *= indices->dim(i).value(); + } + + const auto axis_size = params->dim(axis).value(); + + for (uint32_t outer = 0; outer < outer_size; ++outer) + { + for (uint32_t i = 0; i < coord_size; ++i) + { + constant->at<InputT>((outer * coord_size + i) * inner_size) = + params->at<InputT>((outer * axis_size + indices->at<IndexT>(i)) * inner_size); + } + } + loco::replace(gather_node).with(constant); + + return true; +} + +bool fold_gather(luci::CircleGather *gather_node) +{ + const auto params = dynamic_cast<luci::CircleConst *>(gather_node->params()); + if (not params) + return false; + + const auto indices = dynamic_cast<luci::CircleConst *>(gather_node->indices()); + if (not indices) + return false; + + // TODO: support more types + if (params->dtype() != loco::DataType::S32 and params->dtype() != loco::DataType::S64) + return false; + + if (indices->dtype() != loco::DataType::S32 and indices->dtype() != loco::DataType::S64) + throw std::runtime_error("Unsupported type"); + + if (params->dtype() == loco::DataType::S64) + { + if (indices->dtype() == loco::DataType::S64) + return fold_gather<loco::DataType::S64, loco::DataType::S64>(gather_node); + else + return fold_gather<loco::DataType::S64, loco::DataType::S32>(gather_node); + } + else + { + if (indices->dtype() == loco::DataType::S64) + return fold_gather<loco::DataType::S32, loco::DataType::S64>(gather_node); + else + return fold_gather<loco::DataType::S32, loco::DataType::S32>(gather_node); + } +} + +} // namespace + +namespace luci +{ + +/** + * Constant Folding for Gather Op + **/ +bool FoldGatherPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto gather_node = dynamic_cast<luci::CircleGather *>(node)) + { + if (fold_gather(gather_node)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldGatherPass.test.cpp b/compiler/luci/pass/src/FoldGatherPass.test.cpp new file mode 100644 index 000000000..b02c034a5 --- /dev/null +++ b/compiler/luci/pass/src/FoldGatherPass.test.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldGatherPass.h" +#include "PassTestGraphs.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +/** + * + * Graph that has a Gather S64 Op with const inputs + * + * BEFORE + * params: [Const] (shape: [3], values: [1, 2, 3]) + * indices: [Const] (shape: [1], values: [1]) + * + * [params] [indices] + * | | + * ---[Gather]--- + * + * AFTER + * [Const] (shape: [1], values: [2]) + * + */ +class S64FoldGatherSimpleTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test +{ +public: + S64FoldGatherSimpleTest() : luci::ConstantFoldingAddTestGraph({1}, loco::DataType::S64) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + _gather = _g.nodes()->create<luci::CircleGather>(); + _params = _g.nodes()->create<luci::CircleConst>(); + _indices = _g.nodes()->create<luci::CircleConst>(); + + _gather->dtype(loco::DataType::S64); + _params->dtype(loco::DataType::S64); + _indices->dtype(loco::DataType::S64); + + _params->shape({3}); + _indices->shape({1}); + + _params->size<loco::DataType::S64>(3); + _params->at<loco::DataType::S64>(0) = 1; + _params->at<loco::DataType::S64>(1) = 2; + _params->at<loco::DataType::S64>(2) = 3; + + _indices->size<loco::DataType::S64>(1); + _indices->at<loco::DataType::S64>(0) = 1; + + _gather->params(_params); + _gather->indices(_indices); + + _gather->name("gather"); + _params->name("params"); + _indices->name("indices"); + + return _gather; + } + +protected: + luci::CircleGather *_gather = nullptr; + luci::CircleConst *_params = nullptr; + luci::CircleConst *_indices = nullptr; +}; + +/** + * + * Graph that has a Gather S32 Op with axis = 1 and with const inputs + * + * BEFORE + * params: [Const] (shape: [2, 3], values: [0, 1, 2, 3, 4, 5]) + * indices: [Const] (shape: [2], values: [2, 1]) + * + * [params] [indices] + * | | + * ---[Gather]--- + * + * AFTER + * [Const] (shape: [2, 2], values: [2, 1, 5, 4]) + * + */ + +class S32FoldGatherTwoDimsTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test +{ +public: + S32FoldGatherTwoDimsTest() : luci::ConstantFoldingAddTestGraph({4, 2}, loco::DataType::S32) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + _gather = _g.nodes()->create<luci::CircleGather>(); + _params = _g.nodes()->create<luci::CircleConst>(); + _indices = _g.nodes()->create<luci::CircleConst>(); + + _gather->dtype(loco::DataType::S32); + _params->dtype(loco::DataType::S32); + _indices->dtype(loco::DataType::S32); + + _params->shape({2, 3}); + _indices->shape({2}); + + _params->size<loco::DataType::S32>(6); + _params->at<loco::DataType::S32>(0) = 0; + _params->at<loco::DataType::S32>(1) = 1; + _params->at<loco::DataType::S32>(2) = 2; + _params->at<loco::DataType::S32>(3) = 3; + _params->at<loco::DataType::S32>(4) = 4; + _params->at<loco::DataType::S32>(5) = 5; + + _indices->size<loco::DataType::S32>(2); + _indices->at<loco::DataType::S32>(0) = 2; + _indices->at<loco::DataType::S32>(1) = 1; + + _gather->params(_params); + _gather->indices(_indices); + + _gather->axis(1); + + _gather->name("gather"); + _params->name("params"); + _indices->name("indices"); + + return _gather; + } + +protected: + luci::CircleGather *_gather = nullptr; + luci::CircleConst *_params = nullptr; + luci::CircleConst *_indices = nullptr; +}; + +} // namespace + +TEST(FoldGatherTest, name) +{ + luci::FoldGatherPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(S64FoldGatherSimpleTest, fold_gather_simple) +{ + luci::FoldGatherPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::S64, folded_const->dtype()); + EXPECT_EQ(1, folded_const->rank()); + EXPECT_EQ(1, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0)); +} + +TEST_F(S32FoldGatherTwoDimsTest, fold_gather_with_two_dim) +{ + luci::FoldGatherPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::S32, folded_const->dtype()); + EXPECT_EQ(2, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + + EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(0)); + EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(1)); + EXPECT_EQ(5, folded_const->at<loco::DataType::S32>(2)); + EXPECT_EQ(4, folded_const->at<loco::DataType::S32>(3)); +} + +TEST_F(S64FoldGatherSimpleTest, illegal_input_NEG) +{ + _indices->dtype(loco::DataType::FLOAT32); + + luci::FoldGatherPass pass; + EXPECT_ANY_THROW(pass.run(graph())); +} + +TEST_F(S64FoldGatherSimpleTest, illegal_axis_NEG) +{ + _gather->axis(1); + + luci::FoldGatherPass pass; + EXPECT_ANY_THROW(pass.run(graph())); +} diff --git a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp index de973a431..68136b244 100644 --- a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp +++ b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp @@ -186,12 +186,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8) // (1) normal case: qparam is propagated to input_1 and input_2 // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to // input_2 - // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2 + // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat // (4) const input: input_1 is const. constant values are quantized // normal case: qparam of concat_node is propagated to input_1 and input_2 SimpleConcatGraph g(loco::DataType::U8); - luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(&g.concat_node); EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]); EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]); @@ -202,7 +202,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8) // input_1 is an input of input_2. qparam is propagated only to input_2 SimpleConcatGraph g2(loco::DataType::U8); g2.input_2.input(&g2.input_1); - luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(&g2.concat_node); EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]); EXPECT_EQ(77, g2.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]); @@ -210,19 +210,19 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8) EXPECT_FLOAT_EQ(3.14, g2.input_2.quantparam()->scale[0]); EXPECT_EQ(77, g2.input_2.quantparam()->zerop[0]); - // input_1 is concat. qparam is propagated only to input_2 + // input_1 is concat. qparam is propagated to subsequent concat SubsequentConcatGraph sg(loco::DataType::U8); - luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(&sg.concat_node); EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]); EXPECT_EQ(77, sg.concat_node.quantparam()->zerop[0]); - EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]); - EXPECT_EQ(1, sg.input_1.quantparam()->zerop[0]); + EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]); + EXPECT_EQ(77, sg.input_1.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]); EXPECT_EQ(77, sg.input_2.quantparam()->zerop[0]); // input_1 is const. const values are quantized with the qparam of concat ConstInputConcatGraph cg(loco::DataType::U8); - luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(cg.concat_node); EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]); EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]); const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0)); @@ -248,7 +248,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG) // concat has fused activation function g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU); - luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(&g.concat_node); EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]); EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]); @@ -261,7 +261,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG) // const values are quantized using its min/max ConstInputConcatGraph cg(loco::DataType::U8); cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU); - luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8); + luci::propagate_concat_quantparam(cg.concat_node); EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]); EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]); const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0)); @@ -283,12 +283,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16) // (1) normal case: qparam is propagated to input_1 and input_2 // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to // input_2 - // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2 + // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat // (4) const input: input_1 is const. constant values are quantized // normal case: qparam of concat_node is propagated to input_1 and input_2 SimpleConcatGraph g(loco::DataType::S16); - luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(&g.concat_node); EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]); EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]); @@ -299,7 +299,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16) // input_1 is an input of input_2. qparam is propagated only to input_2 SimpleConcatGraph g2(loco::DataType::S16); g2.input_2.input(&g2.input_1); - luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(&g2.concat_node); EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]); EXPECT_EQ(0, g2.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]); @@ -309,17 +309,17 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16) // input_1 is concat. qparam is propagated only to input_2 SubsequentConcatGraph sg(loco::DataType::S16); - luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(&sg.concat_node); EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]); EXPECT_EQ(0, sg.concat_node.quantparam()->zerop[0]); - EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]); + EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]); EXPECT_EQ(0, sg.input_1.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]); EXPECT_EQ(0, sg.input_2.quantparam()->zerop[0]); // input_1 is const. const values are quantized with the qparam of concat ConstInputConcatGraph cg(loco::DataType::S16); - luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(cg.concat_node); EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]); EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]); const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0)); @@ -345,7 +345,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG) // concat has fused activation function g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU); - luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(&g.concat_node); EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]); EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]); EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]); @@ -358,7 +358,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG) // const values are quantized using its min/max ConstInputConcatGraph cg(loco::DataType::S16); cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU); - luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16); + luci::propagate_concat_quantparam(cg.concat_node); EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]); EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]); const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0)); diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp new file mode 100644 index 000000000..b4975486d --- /dev/null +++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQParamBackwardPass.h" +#include "QuantizationUtils.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Service/Nodes/CircleConst.h> +#include <luci/Log.h> + +#include <cmath> + +namespace +{ + +void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop, + loco::DataType quant_type) +{ + uint32_t size = const_node->size<loco::DataType::FLOAT32>(); + + const float scaling_factor_inv = 1.0 / scaling_factor; + std::vector<int32_t> quantized_values(size); + for (uint32_t i = 0; i < size; ++i) + { + auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i)); + double quantized_data = std::round(data * scaling_factor_inv) + zerop; + constexpr double int_max = static_cast<double>(std::numeric_limits<int32_t>::max()); + constexpr double int_min = static_cast<double>(std::numeric_limits<int32_t>::min()); + quantized_data = std::min(int_max, std::max(int_min, quantized_data)); + + quantized_values[i] = static_cast<int32_t>(quantized_data); + } + + switch (quant_type) + { + case loco::DataType::U8: + const_node->dtype(loco::DataType::U8); // change the type of tensor + const_node->size<loco::DataType::U8>(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i])); + break; + case loco::DataType::S16: + assert(zerop == 0); + const_node->dtype(loco::DataType::S16); // change the type of tensor + const_node->size<loco::DataType::S16>(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + const_node->at<loco::DataType::S16>(i) = + std::min(32767, std::max(-32767, quantized_values[i])); + break; + default: + throw std::runtime_error("Unsupported data type"); + } +} + +void overwrite_quantparam(const luci::CircleNode *source, luci::CircleNode *target) +{ + auto source_qparam = source->quantparam(); + if (source_qparam == nullptr) + throw std::runtime_error("source quantparam is not found during overwrite"); + + auto target_qparam = target->quantparam(); + if (target_qparam == nullptr) + { + auto quantparam = std::make_unique<luci::CircleQuantParam>(); + target->quantparam(std::move(quantparam)); + target_qparam = target->quantparam(); + + if (target_qparam == nullptr) + throw std::runtime_error("Creating new quant param failed"); + } + target_qparam->min = source_qparam->min; + target_qparam->max = source_qparam->max; + target_qparam->scale = source_qparam->scale; + target_qparam->zerop = source_qparam->zerop; + target_qparam->quantized_dimension = source_qparam->quantized_dimension; +} + +/** + * Tells if pad_v2 quantization should ignore padding value + * In that case padding const will be quantized with input parameters, and probably clipped + */ +bool ignore_pad_v2_const_quantization(const luci::CirclePadV2 *pad) +{ + // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly + // TODO use metadata hints to detect this case + auto const_value_node = dynamic_cast<const luci::CircleConst *>(pad->arg(2)); + if (!const_value_node) + return false; + if (const_value_node->dtype() == loco::DataType::FLOAT32) + { + float const_value = const_value_node->at<loco::DataType::FLOAT32>(0); + if (const_value == std::numeric_limits<float>::lowest()) + return true; + } + return false; +} + +/** EXAMPLE + * + * BEFORE + * + * [CircleNode] [CircleConst] + * (qparam1) (FP32) + * \ / + * \ / + * [CirclePack] + * (qparam2) + * + * AFTER + * + * [CircleNode] [CircleConst] [CircleConst] <- Dead node + * (qparam2) (qparam2) (FP32) + * \ / + * \ / + * [CirclePack] + * (qparam2) + * + * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs. + */ +void propagate_pack_quantparam(luci::CirclePack *pack) +{ + assert(pack->quantparam() != nullptr); + + const auto num_inputs = pack->values_count(); + + for (uint32_t i = 0; i < num_inputs; i++) + { + auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i)); + + // Quantize constant values + if (node->opcode() == luci::CircleOpcode::CIRCLECONST) + { + luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); + if (const_node->dtype() != loco::DataType::FLOAT32) + throw std::runtime_error("Unsupported data type for constant input of pack Op"); + + const auto pack_qparam = pack->quantparam(); + if (pack_qparam == nullptr) + throw std::runtime_error("quantparam of pack is not found during propagation"); + + assert(pack_qparam->scale.size() == 1); + assert(pack_qparam->zerop.size() == 1); + const auto scaling_factor = pack_qparam->scale[0]; + const auto zerop = pack_qparam->zerop[0]; + + auto new_const = luci::clone(const_node); + quant_const_values(new_const, scaling_factor, zerop, pack->dtype()); + pack->values(i, new_const); + overwrite_quantparam(pack, new_const); + } + else + { + const auto succs = loco::succs(node); + if (succs.size() > 1) + continue; + + // Non-const input must have been quantized + assert(node->quantparam() != nullptr); + overwrite_quantparam(pack, node); + } + } +} + +/** EXAMPLE + * + * + * + * BEFORE + * + * [CircleNode] [CircleConst] [CircleConst] [CircleNode] + * (S32) (S32) (FP32) (U8 qparam1) + * \ \ / / + * \ \ / / + * \ \ / / + * -------[CircleOneHot]------- + * (U8 qparam2) + * + * AFTER + * + * [CircleNode] [CircleConst] [CircleConst] [CircleNode] [CircleConst] <- Dead node + * (S32) (S32) (U8 qparam2) (U8 qparam2) (FP32) + * \ \ / / + * \ \ / / + * \ \ / / + * -------[CircleOneHot]------- + * (U8 qparam2) + * + * NOTE Quantization parameter of CircleOneHot (qparam2) is propagated to on_value/off_value. + */ +void propagate_one_hot_quantparam(luci::CircleOneHot *one_hot) +{ + assert(one_hot->quantparam() != nullptr); + + // Propagate quantization parameters from output to inputs, + // to fit both input and counstant_value in one quant range. + auto quant_input = [one_hot](void (luci::CircleOneHot::*arg_setter)(loco::Node *), + loco::Node *(luci::CircleOneHot::*arg_getter)() const) { + auto node = loco::must_cast<luci::CircleNode *>((one_hot->*arg_getter)()); + + // Quantize constant values + if (node->opcode() == luci::CircleOpcode::CIRCLECONST) + { + luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); + if (is_quantized(const_node)) + return; + + if (const_node->dtype() != loco::DataType::FLOAT32) + throw std::runtime_error("Unsupported data type for constant input of OneHot Op"); + + const auto qparam = one_hot->quantparam(); + if (qparam == nullptr) + throw std::runtime_error("quantparam of OneHot is not found during propagation"); + + assert(qparam->scale.size() == 1); + const auto scaling_factor = qparam->scale.at(0); + const auto zerop = qparam->zerop.at(0); + + auto new_const = luci::clone(const_node); + quant_const_values(new_const, scaling_factor, zerop, one_hot->dtype()); + overwrite_quantparam(one_hot, new_const); + (one_hot->*arg_setter)(new_const); + } + else + { + const auto succs = loco::succs(node); + if (succs.size() > 1) + return; + + // Non-const input must have been quantized + assert(node->quantparam() != nullptr); + overwrite_quantparam(one_hot, node); + } + }; + + quant_input(&luci::CircleOneHot::on_value, &luci::CircleOneHot::on_value); + quant_input(&luci::CircleOneHot::off_value, &luci::CircleOneHot::off_value); +} + +} // namespace + +namespace luci +{ + +/** BEFORE + * + * [CircleNode] [CircleConst] + * (U8 qparam1) (FP32) + * \ / + * \ / + * [CircleConcatenation] + * (U8 qparam2) + * + * AFTER + * [CircleNode] [CircleConst] [CircleConst] <- Dead node + * (U8 qparam2) (U8 qparam2) (FP32) + * \ / + * \ / + * [CircleConcatenation] + * (U8 qparam2) + */ +void propagate_concat_quantparam(luci::CircleConcatenation *concat) +{ + assert(concat->quantparam() != nullptr); + + const auto num_inputs = concat->numValues(); + + // Quantize const inputs using their values if concat has fused act function + if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE) + { + for (uint32_t i = 0; i < num_inputs; i++) + { + auto node = concat->arg(i); + auto const_node = dynamic_cast<luci::CircleConst *>(node); + if (const_node != nullptr) + { + auto new_const = luci::clone(const_node); + quant_const(new_const, concat->dtype()); + concat->values(i, new_const); + } + } + return; + } + + for (uint32_t i = 0; i < num_inputs; i++) + { + auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i)); + + // Quantize constant values + if (node->opcode() == luci::CircleOpcode::CIRCLECONST) + { + luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); + + const auto concat_qparam = concat->quantparam(); + assert(concat_qparam->scale.size() == 1); + const auto scaling_factor = concat_qparam->scale[0]; + const auto zerop = concat_qparam->zerop[0]; + + auto new_const = luci::clone(const_node); + quant_const_values(new_const, scaling_factor, zerop, concat->dtype()); + concat->values(i, new_const); + overwrite_quantparam(concat, new_const); + } + else + { + const auto succs = loco::succs(node); + if (succs.size() > 1) + continue; + + // Non-const input must have been quantized + assert(node->quantparam() != nullptr); + overwrite_quantparam(concat, node); + } + } +} + +/** BEFORE + * + * [CircleNode] [CircleConst] [CircleConst] + * (U8 qparam1) (S32) (FP32) + * \ | / + * \ | / + * [CirclePadV2] + * (U8 qparam2) + * + * AFTER (case 1) + * + * By default qparam is propagated from output to inputs to meet backend requirements. + * + * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node + * (U8 qparam2) (S32) (U8 qparam2) (FP32) + * \ | / + * \ | / + * [CirclePadV2] + * (U8 qparam2) + * + * AFTER (case 2) + * + * In case padded value is the lowest float value + * Qparam is propagated from input to output and constant. + * + * This is a special case for optimization constructed pad, needed to guarantee that + * extremely large negative constant do not stretch output quantization range. + * + * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node + * (U8 qparam1) (S32) (U8 qparam1) (FP32) + * \ | / + * \ | / + * [CirclePadV2] + * (U8 qparam1) + */ +void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2) +{ + if (ignore_pad_v2_const_quantization(pad_v2)) + { + // propagate input quantization paramters from input to output and padding const value + auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0)); + overwrite_quantparam(pad_v2_input, pad_v2); + + auto const_value_node = loco::must_cast<luci::CircleConst *>( + pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS + auto new_const = luci::clone(const_value_node); + + const auto pad_v2_input_qparam = pad_v2_input->quantparam(); + assert(pad_v2_input_qparam != nullptr); + assert(pad_v2_input_qparam->scale.size() == 1); + const auto scaling_factor = pad_v2_input_qparam->scale.at(0); + const auto zerop = pad_v2_input_qparam->zerop.at(0); + + quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype()); + overwrite_quantparam(pad_v2_input, new_const); + pad_v2->constant_values(new_const); + return; + } + + // Propagate quantization paramters from output to inputs, + // to fit both input and counstant_value in one quant range. + auto quant_input = [pad_v2](void (CirclePadV2::*arg_setter)(loco::Node *), uint32_t arg) { + auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg)); + + // Quantize constant values + if (node->opcode() == luci::CircleOpcode::CIRCLECONST) + { + luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); + if (is_quantized(const_node)) + return; + + if (const_node->dtype() != loco::DataType::FLOAT32) + throw std::runtime_error("Unsupported data type for constant input of PadV2 Op"); + + const auto pad_v2_qparam = pad_v2->quantparam(); + if (pad_v2_qparam == nullptr) + throw std::runtime_error("quantparam of PadV2 is not found during propagation"); + + assert(pad_v2_qparam->scale.size() == 1); + const auto scaling_factor = pad_v2_qparam->scale.at(0); + const auto zerop = pad_v2_qparam->zerop.at(0); + + auto new_const = luci::clone(const_node); + quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype()); + overwrite_quantparam(pad_v2, new_const); + (pad_v2->*arg_setter)(new_const); + } + else + { + const auto succs = loco::succs(node); + if (succs.size() > 1) + return; + + // Non-const input must have been quantized + assert(node->quantparam() != nullptr); + overwrite_quantparam(pad_v2, node); + } + }; + + quant_input(&CirclePadV2::input, 0); + quant_input(&CirclePadV2::constant_values, 2); +} + +} // namespace luci + +namespace +{ + +// Visitor to propagate quantization parameters backwards +struct PropagateQParamBackward final : public luci::CircleNodeMutableVisitor<void> +{ + void visit(luci::CircleNode *) {} + + void visit(luci::CircleConcatenation *node) { propagate_concat_quantparam(node); } + + void visit(luci::CircleOneHot *node) { propagate_one_hot_quantparam(node); } + + void visit(luci::CirclePack *node) { propagate_pack_quantparam(node); } + + void visit(luci::CirclePadV2 *node) { propagate_pad_v2_quantparam(node); } +}; + +} // namespace + +namespace luci +{ + +bool PropagateQParamBackwardPass::run(loco::Graph *g) +{ + LOGGER(l); + + // We use reverse post-order traversal as qparam is propagated backward + auto nodes = loco::postorder_traversal(loco::output_nodes(g)); + std::reverse(nodes.begin(), nodes.end()); + for (auto node : nodes) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + INFO(l) << "PropagateQParamBackwardPass visit node: " << circle_node->name() << std::endl; + + // We can't propagate non-existent qparam + if (circle_node->quantparam() == nullptr) + continue; + + PropagateQParamBackward pqb; + circle_node->accept(&pqb); + } + + // This pass is only run once, so return false + // TODO Refactoring not to return meaningless value + return false; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp new file mode 100644 index 000000000..33af70449 --- /dev/null +++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQParamBackwardPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +using namespace luci; + +namespace +{ + +void set_qparam(luci::CircleNode *node, float scale, int64_t zp) +{ + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->scale.emplace_back(scale); + qparam->zerop.emplace_back(zp); + + node->quantparam(std::move(qparam)); +} + +/** + * @brief Base Test Graph + */ +struct TestGraph +{ +public: + virtual void init(void) = 0; +}; + +/** + * Graph with two concats + * + * [CircleInput] [CircleConst] + * \ / + * [CircleConcatenation] [CircleConst] + * | | + * [CircleConcatenation] + * | + * [CircleOutput] + * + * BEFORE + * - Concat1 and Concat 2 have different qparams + * + * AFTER + * - All Ops have the same qparam + */ +struct SubsequentConcatGraph : public TestGraph +{ +public: + void init(void) final + { + // graph input and output + auto graph_input = g.inputs()->create(); + auto graph_output = g.outputs()->create(); + + // input + input = g.nodes()->create<luci::CircleInput>(); + input->index(graph_input->index()); + input->shape({1, 4, 4, 3}); + input->dtype(loco::DataType::U8); + set_qparam(input, 1.0, 1); + + // const1 + const1 = g.nodes()->create<luci::CircleConst>(); + const1->shape({1, 4, 4, 3}); + const1->dtype(loco::DataType::FLOAT32); + const1->size<loco::DataType::FLOAT32>(48); + for (uint32_t i = 0; i < 48; i++) + const1->at<loco::DataType::FLOAT32>(i) = i; + + // concat1 + concat1 = g.nodes()->create<luci::CircleConcatenation>(2); + concat1->shape({1, 4, 4, 6}); + concat1->dtype(loco::DataType::U8); + set_qparam(concat1, 2.0, 2); + concat1->values(0, input); + concat1->values(1, const1); + concat1->fusedActivationFunction(luci::FusedActFunc::NONE); + + // const2 + const2 = g.nodes()->create<luci::CircleConst>(); + const2->shape({1, 4, 4, 3}); + const2->dtype(loco::DataType::FLOAT32); + const2->size<loco::DataType::FLOAT32>(48); + for (uint32_t i = 0; i < 48; i++) + const2->at<loco::DataType::FLOAT32>(i) = i; + + // concat2 + concat2 = g.nodes()->create<luci::CircleConcatenation>(2); + concat2->shape({1, 4, 4, 9}); + concat2->dtype(loco::DataType::U8); + set_qparam(concat2, 3.0, 3); + concat2->values(0, concat1); + concat2->values(1, const2); + concat2->fusedActivationFunction(luci::FusedActFunc::NONE); + + // output + output = g.nodes()->create<luci::CircleOutput>(); + output->index(graph_output->index()); + output->from(concat2); + output->shape({1, 4, 4, 9}); + output->dtype(loco::DataType::U8); + set_qparam(output, 3.0, 3); + } + +public: + loco::Graph g; + CircleInput *input = nullptr; + CircleConcatenation *concat1 = nullptr; + CircleConcatenation *concat2 = nullptr; + CircleConst *const1 = nullptr; + CircleConst *const2 = nullptr; + CircleOutput *output = nullptr; +}; + +} // namespace + +TEST(PropagateQParamBackwardPassTest, name) +{ + luci::PropagateQParamBackwardPass pass(loco::DataType::U8); + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST(PropagateQParamBackwardPassTest, subsequent_propagation) +{ + SubsequentConcatGraph graph; + + graph.init(); + + luci::PropagateQParamBackwardPass pass(loco::DataType::U8); + + pass.run(&graph.g); + + EXPECT_EQ(3.0, graph.concat2->quantparam()->scale[0]); + EXPECT_EQ(3, graph.concat2->quantparam()->zerop[0]); + + auto const2 = loco::must_cast<CircleNode *>(graph.concat2->values(1)); + EXPECT_EQ(3.0, const2->quantparam()->scale[0]); + EXPECT_EQ(3, const2->quantparam()->zerop[0]); + + EXPECT_EQ(3.0, graph.concat1->quantparam()->scale[0]); + EXPECT_EQ(3, graph.concat1->quantparam()->zerop[0]); + + auto const1 = loco::must_cast<CircleNode *>(graph.concat1->values(1)); + EXPECT_EQ(3.0, const1->quantparam()->scale[0]); + EXPECT_EQ(3, const1->quantparam()->zerop[0]); + + EXPECT_EQ(3.0, graph.input->quantparam()->scale[0]); + EXPECT_EQ(3, graph.input->quantparam()->zerop[0]); +} diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp new file mode 100644 index 000000000..003e4c293 --- /dev/null +++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQParamForwardPass.h" + +#include "QuantizationUtils.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Log.h> + +#include <iostream> + +namespace +{ + +bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst) +{ + assert(src->scale.size() == dst->scale.size()); + assert(src->zerop.size() == dst->zerop.size()); + + // src and dst have the same qparam + if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) && + std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) && + src->quantized_dimension == dst->quantized_dimension) + return false; + + dst->scale.assign(src->scale.begin(), src->scale.end()); + dst->zerop.assign(src->zerop.begin(), src->zerop.end()); + dst->quantized_dimension = src->quantized_dimension; + return true; +} + +bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst) +{ + // Skip nodes that do not have quantparams + auto src_qparam = src->quantparam(); + if (not src_qparam) + return false; + + auto dst_qparam = dst->quantparam(); + if (not dst_qparam) + return false; + + return copy_qparam(src_qparam, dst_qparam); +} + +// Visitor to propagate quantization parameters +struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool> +{ + PropagateQParamForward() = default; + + bool visit(luci::CircleNode *) { return false; } + + bool visit(luci::CircleGather *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->params()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleReshape *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->tensor()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleTranspose *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->a()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleStridedSlice *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->input()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleSplitOut *node) + { + auto split = loco::must_cast<luci::CircleSplit *>(node->input()); + auto input_node = loco::must_cast<luci::CircleNode *>(split->input()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleSplitVOut *node) + { + auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); + auto input_node = loco::must_cast<luci::CircleNode *>(splitv->input()); + return copy_qparam(input_node, node); + } + + bool visit(luci::CircleUnpackOut *node) + { + auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); + auto input_node = loco::must_cast<luci::CircleNode *>(unpack->value()); + return copy_qparam(input_node, node); + } + + // Propagate qparam across Quantize op to ensure + // special qparams (pre-defined values, integer scale) + bool visit(luci::CircleQuantize *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->input()); + + // Skip if input_node is not quantized activation + if (input_node->dtype() != loco::DataType::U8 and input_node->dtype() != loco::DataType::S16) + return false; + + // If input_node and node have the same dtype, Quantize op + // will do rescale, not requantize for mixed-precision + if (input_node->dtype() == node->dtype()) + return false; + + assert(node->dtype() == loco::DataType::U8 or node->dtype() == loco::DataType::S16); + + auto prev_qparam = node->quantparam(); + assert(prev_qparam); + assert(prev_qparam->scale.size() == 1); + assert(prev_qparam->zerop.size() == 1); + + const auto prev_scale = prev_qparam->scale[0]; + const auto prev_zerop = prev_qparam->zerop[0]; + + auto qtype = luci::activation_qtype(input_node); + switch (qtype) + { + case luci::ActivationQType::PreDefinedValue: + node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype())); + break; + case luci::ActivationQType::IntScale: + luci::set_int_scale(node); + break; + default: + break; + } + + assert(node->quantparam()); + assert(node->quantparam()->scale.size() == 1); + assert(node->quantparam()->zerop.size() == 1); + + const auto scale = node->quantparam()->scale[0]; + const auto zerop = node->quantparam()->zerop[0]; + + // Compare qparam with saved values to detect update + return scale != prev_scale or zerop != prev_zerop; + } +}; + +} // namespace + +namespace luci +{ + +bool PropagateQParamForwardPass::run(loco::Graph *g) +{ + bool changed = false; + LOGGER(l); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + INFO(l) << "PropagateQParamForwardPass visit node: " << circle_node->name() << std::endl; + + PropagateQParamForward pqp; + if (circle_node->accept(&pqp)) + changed = true; + + if (_TF_style_maxpool) + { + if (auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(node)) + { + auto input = loco::must_cast<luci::CircleNode *>(maxpool->value()); + copy_qparam(input, maxpool); + } + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp new file mode 100644 index 000000000..a734c0873 --- /dev/null +++ b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQParamForwardPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale, + const std::vector<int64_t> &zp) +{ + assert(node->quantparam() == nullptr); + + auto quantparam = std::make_unique<luci::CircleQuantParam>(); + quantparam->scale = scale; + quantparam->zerop = zp; + node->quantparam(std::move(quantparam)); +} + +/** + * Simple graph for test + * + * BEFORE + * + * [Conv] (qparam 1) + * | + * [Reshape] (qparam 2) + * + * AFTER + * + * [Conv] (qparam 2) + * | + * [Reshape] (qparam 2) + * + */ +class SimpleGraph +{ +public: + SimpleGraph() + { + input = g.nodes()->create<luci::CircleInput>(); + conv = g.nodes()->create<luci::CircleConv2D>(); + reshape = g.nodes()->create<luci::CircleReshape>(); + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20}); + addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10}); + + conv->input(input); + reshape->tensor(conv); + output->from(reshape); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleConv2D *conv = nullptr; + luci::CircleReshape *reshape = nullptr; + luci::CircleOutput *output = nullptr; +}; + +/** + * Test graph for forward propagation in Quantize Op + * + * BEFORE + * + * [Tanh U8] (qparam 1 - pre-defined for U8) + * | + * [Quantize S16] (qparam 2 - not pre-defined value) + * + * AFTER + * + * [Tanh U8] (qparam 1 - pre-defined for U8) + * | + * [Quantize S16] (qparam 3 - pre-defined for S16) + * + */ +class TanhQuantizeGraph +{ +public: + TanhQuantizeGraph() + { + input = g.nodes()->create<luci::CircleInput>(); + tanh = g.nodes()->create<luci::CircleTanh>(); + quantize = g.nodes()->create<luci::CircleQuantize>(); + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + tanh->dtype(loco::DataType::U8); + quantize->dtype(loco::DataType::S16); + + addQuantParam(tanh, {2.0f / 256.0f}, {128}); // pre-defined qparam for U8 + addQuantParam(quantize, {1.0}, {0}); // not pre-defined values + + tanh->x(input); + quantize->input(tanh); + output->from(quantize); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleTanh *tanh = nullptr; + luci::CircleQuantize *quantize = nullptr; + luci::CircleOutput *output = nullptr; +}; + +/** + * Test graph for forward propagation in Quantize Op + * + * BEFORE + * + * [Floor U8] (qparam 1 - int scale) + * | + * [Quantize S16] (qparam 2 - not int scale) + * + * AFTER + * + * [Floor U8] (qparam 1 - int scale) + * | + * [Quantize S16] (qparam 3 - int scale) + * + */ +class FloorQuantizeGraph +{ +public: + FloorQuantizeGraph() + { + input = g.nodes()->create<luci::CircleInput>(); + floor = g.nodes()->create<luci::CircleFloor>(); + quantize = g.nodes()->create<luci::CircleQuantize>(); + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + floor->dtype(loco::DataType::U8); + quantize->dtype(loco::DataType::S16); + + addQuantParam(floor, {4.0f}, {128}); // int scale + addQuantParam(quantize, {0.3}, {0}); // not int scale + + floor->x(input); + quantize->input(floor); + output->from(quantize); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleFloor *floor = nullptr; + luci::CircleQuantize *quantize = nullptr; + luci::CircleOutput *output = nullptr; +}; + +} // namespace + +TEST(PropagateQParamForwardPassTest, name) +{ + luci::PropagateQParamForwardPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST(PropagateQParamForward, simple) +{ + SimpleGraph g; + + luci::PropagateQParamForwardPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]); + EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]); + EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]); + EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]); +} + +TEST(PropagateQParamForward, wrong_op_NEG) +{ + SimpleGraph g; + g.output->from(g.conv); + g.reshape->drop(); + + luci::PropagateQParamForwardPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]); + EXPECT_EQ(0, g.conv->quantparam()->zerop[0]); + EXPECT_EQ(10, g.conv->quantparam()->zerop[1]); + EXPECT_EQ(20, g.conv->quantparam()->zerop[2]); +} + +TEST(PropagateQParamForward, tanh_predefined_value) +{ + TanhQuantizeGraph g; + + luci::PropagateQParamForwardPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(1.0f / 32768.0f, g.quantize->quantparam()->scale[0]); +} + +TEST(PropagateQParamForward, floor_int_scale) +{ + FloorQuantizeGraph g; + + luci::PropagateQParamForwardPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(1.0f, g.quantize->quantparam()->scale[0]); +} + +TEST(PropagateQParamForward, same_dtype_NEG) +{ + FloorQuantizeGraph g; + g.quantize->dtype(loco::DataType::U8); + + luci::PropagateQParamForwardPass pass; + while (pass.run(&g.g)) + ; + + // Qparam is not propagated as ifm/ofm of Quantize Op have the same dtype + EXPECT_FLOAT_EQ(0.3f, g.quantize->quantparam()->scale[0]); +} diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp deleted file mode 100644 index b1cb7a418..000000000 --- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "luci/Pass/PropagateQuantParamPass.h" - -#include <luci/IR/CircleNodes.h> -#include <luci/IR/CircleNodeVisitor.h> -#include <luci/Log.h> - -#include <iostream> - -namespace -{ - -bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst) -{ - assert(src->scale.size() == dst->scale.size()); - assert(src->zerop.size() == dst->zerop.size()); - - // src and dst have the same qparam - if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) && - std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) && - src->quantized_dimension == dst->quantized_dimension) - return false; - - dst->scale.assign(src->scale.begin(), src->scale.end()); - dst->zerop.assign(src->zerop.begin(), src->zerop.end()); - dst->quantized_dimension = src->quantized_dimension; - return true; -} - -bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst) -{ - // Skip nodes that do not have quantparams - auto src_qparam = src->quantparam(); - if (not src_qparam) - return false; - - auto dst_qparam = dst->quantparam(); - if (not dst_qparam) - return false; - - return copy_qparam(src_qparam, dst_qparam); -} - -// Visitor to propagate quantization parameters -struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool> -{ - PropagateQuantParam() = default; - - bool visit(luci::CircleNode *) { return false; } - - bool visit(luci::CircleReshape *node) - { - auto input = node->tensor(); - if (loco::succs(input).size() != 1) - return false; - - auto input_node = loco::must_cast<luci::CircleNode *>(input); - return copy_qparam(input_node, node); - } - - bool visit(luci::CircleTranspose *node) - { - auto input_node = loco::must_cast<luci::CircleNode *>(node->a()); - return copy_qparam(input_node, node); - } - - // TODO : Add more Ops (e.g., layout-changing Ops) -}; - -} // namespace - -namespace luci -{ - -bool PropagateQuantParamPass::run(loco::Graph *g) -{ - bool changed = false; - LOGGER(l); - for (auto node : loco::active_nodes(loco::output_nodes(g))) - { - auto circle_node = loco::must_cast<luci::CircleNode *>(node); - INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl; - - PropagateQuantParam pqp; - if (circle_node->accept(&pqp)) - changed = true; - } - - return changed; -} - -} // namespace luci diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp deleted file mode 100644 index 0f1564223..000000000 --- a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "luci/Pass/PropagateQuantParamPass.h" - -#include <luci/IR/CircleNodes.h> - -#include <gtest/gtest.h> - -namespace -{ - -void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale, - const std::vector<int64_t> &zp) -{ - assert(node->quantparam() == nullptr); - - auto quantparam = std::make_unique<luci::CircleQuantParam>(); - quantparam->scale = scale; - quantparam->zerop = zp; - node->quantparam(std::move(quantparam)); -} - -/** - * Simple graph for test - * - * BEFORE - * - * [Conv] (qparam 1) - * | - * [Reshape] (qparam 2) - * - * AFTER - * - * [Conv] (qparam 2) - * | - * [Reshape] (qparam 2) - * - */ -class SimpleGraph -{ -public: - SimpleGraph() - { - input = g.nodes()->create<luci::CircleInput>(); - conv = g.nodes()->create<luci::CircleConv2D>(); - reshape = g.nodes()->create<luci::CircleReshape>(); - output = g.nodes()->create<luci::CircleOutput>(); - - auto graph_input = g.inputs()->create(); - input->index(graph_input->index()); - auto graph_output = g.outputs()->create(); - output->index(graph_output->index()); - - addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20}); - addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10}); - - conv->input(input); - reshape->tensor(conv); - output->from(reshape); - } - -public: - loco::Graph g; - luci::CircleInput *input; - luci::CircleConv2D *conv; - luci::CircleReshape *reshape; - luci::CircleOutput *output; -}; - -} // namespace - -TEST(PropagateQuantParamPassTest, name) -{ - luci::PropagateQuantParamPass pass; - auto const name = pass.name(); - ASSERT_NE(nullptr, name); -} - -TEST(PropagateQuantParam, simple) -{ - SimpleGraph g; - - luci::PropagateQuantParamPass pass; - while (pass.run(&g.g)) - ; - - EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]); - EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]); - EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]); - EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]); - EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]); - EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]); -} - -TEST(PropagateQuantParam, wrong_op_NEG) -{ - SimpleGraph g; - g.output->from(g.conv); - g.reshape->drop(); - - luci::PropagateQuantParamPass pass; - while (pass.run(&g.g)) - ; - - EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]); - EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]); - EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]); - EXPECT_EQ(0, g.conv->quantparam()->zerop[0]); - EXPECT_EQ(10, g.conv->quantparam()->zerop[1]); - EXPECT_EQ(20, g.conv->quantparam()->zerop[2]); -} diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp index 2f6fed46e..ad86cedf4 100644 --- a/compiler/luci/pass/src/QuantizationUtils.cpp +++ b/compiler/luci/pass/src/QuantizationUtils.cpp @@ -33,43 +33,6 @@ bool is_quantized(const CircleNode *node) node->dtype() == loco::DataType::S64); // bias (int16 quant) } -// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer -bool is_weights(CircleNode *node) -{ - auto circle_const = dynamic_cast<CircleConst *>(node); - if (circle_const == nullptr) - return false; - - auto succs = loco::succs(node); - - // Node is weights if it is the weights of all of its successors - for (auto out : succs) - { - bool is_weights = false; - - auto conv = dynamic_cast<CircleConv2D *>(out); - if (conv != nullptr && conv->filter() == circle_const) - is_weights = true; - - auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out); - if (dw_conv != nullptr && dw_conv->filter() == circle_const) - is_weights = true; - - auto t_conv = dynamic_cast<CircleTransposeConv *>(out); - if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4) - is_weights = true; - - auto fc = dynamic_cast<CircleFullyConnected *>(out); - if (fc != nullptr && fc->weights() == circle_const) - is_weights = true; - - if (!is_weights) - return false; - } - - return true; -} - uint8_t fp32_to_uint8_cast(float f) { assert(std::numeric_limits<uint8_t>::min() <= f); @@ -77,7 +40,6 @@ uint8_t fp32_to_uint8_cast(float f) return static_cast<uint8_t>(f); } -// Per-layer quantization of weights (const tensor) using given min/max values void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max) @@ -107,7 +69,6 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float } } -// Per-layer quantization of weights (const tensor) using given min/max values void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max) @@ -315,4 +276,123 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices) indices[2] * dimension.dim(3).value() + indices[3]; } +ActivationQType activation_qtype(const CircleNode *node) +{ + auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node); + if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) + return ActivationQType::PreDefinedValue; + + switch (node->opcode()) + { + case CircleOpcode::LOGISTIC: + case CircleOpcode::TANH: + case CircleOpcode::SOFTMAX: + return ActivationQType::PreDefinedValue; + case CircleOpcode::FLOOR: + case CircleOpcode::FLOOR_DIV: + case CircleOpcode::FLOOR_MOD: + case CircleOpcode::CEIL: + return ActivationQType::IntScale; + default: + break; + } + + return ActivationQType::MinMax; +} + +std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype) +{ + auto qparam = std::make_unique<CircleQuantParam>(); + + auto set_qparam = [&qparam](float scale, int64_t zp) { + qparam->scale.emplace_back(scale); + qparam->zerop.emplace_back(zp); + }; + + switch (opcode) + { + case CircleOpcode::LOGISTIC: + if (dtype == loco::DataType::U8) + set_qparam(1.0f / 256.0f, 0); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32768.0f, 0); + } + break; + case CircleOpcode::TANH: + if (dtype == loco::DataType::U8) + set_qparam(2.0f / 256.0f, 128); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32768.0f, 0); + } + break; + case CircleOpcode::SOFTMAX: + if (dtype == loco::DataType::U8) + set_qparam(1.0f / 255.0f, 0); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32767.0f, 0); + } + break; + default: + throw std::runtime_error("Unsupported opcode with pre-defined qparam"); + } + return std::move(qparam); +} + +// For nodes with integer output, we use integer scale +void set_int_scale(luci::CircleNode *node) +{ + assert(node); // FIX_CALLER_UNLESS + + auto qparam = node->quantparam(); + assert(qparam); // FIX_CALLER_UNLESS + assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS + + auto fp_scale = qparam->scale[0]; + qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale); +} + +void quant_const(luci::CircleConst *node, loco::DataType quant_type) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++) + { + auto data = node->at<loco::DataType::FLOAT32>(i); + min = data < min ? data : min; + max = data > max ? data : max; + } + + float scaling_factor{0.0}; + int64_t zp{0}; + float nudged_min{0.0}; + float nudged_max{0.0}; + + switch (quant_type) + { + case loco::DataType::U8: + asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, + nudged_max); + break; + case loco::DataType::S16: + symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, + nudged_max); + break; + default: + throw std::runtime_error("Unsupported data type"); + } + + auto quantparam = std::make_unique<luci::CircleQuantParam>(); + quantparam->scale.push_back(scaling_factor); + quantparam->zerop.push_back(zp); + node->quantparam(std::move(quantparam)); +} + } // namespace luci diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h index 605f6a77e..cd8cec95a 100644 --- a/compiler/luci/pass/src/QuantizationUtils.h +++ b/compiler/luci/pass/src/QuantizationUtils.h @@ -23,33 +23,61 @@ namespace luci { +// Compute scale/zp using given min/max for symmetric quantization (int16) void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max); +// Compute scale/zp using given min/max for asymmetric quantization (uint8) void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max); +// Asymmetric per-layer quantization of weights (const tensor) using given min/max values +// NOTE: in-place update of node data void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max); +// Symmetric per-layer quantization of weights (const tensor) using given min/max values +// NOTE: in-place update of node data void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max); +// Helper function to get channel dimension +// TODO Embed this function into iterate_per_channel bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int32_t &channel_dim_index); +// Calculate offset of the given indices in dimension uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices); -void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type); +// Backward propagation of concatenation qparam +void propagate_concat_quantparam(luci::CircleConcatenation *concat); -void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type); - -bool is_weights(CircleNode *node); +// Backward propagation of pad_v2 qparam +void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2); +// Return true if the node is quantized bool is_quantized(const CircleNode *node); +enum ActivationQType +{ + MinMax, // Quantize using recorded min/max + PreDefinedValue, // Quantize using pre-defined values + IntScale, // Round scale to a positive integer +}; + +ActivationQType activation_qtype(const CircleNode *node); + +// Create qparam with pre-defined values for speical operators +std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype); + +// Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil) +void set_int_scale(luci::CircleNode *node); + +// Quantize const tensor using its min/max values +void quant_const(luci::CircleConst *node, loco::DataType quant_type); + } // namespace luci #endif // __LUCI_QUANTIZATION_UTILS_H__ diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp new file mode 100644 index 000000000..149331824 --- /dev/null +++ b/compiler/luci/pass/src/QuantizeActivation.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizeActivation.h" +#include "QuantizationUtils.h" + +#include <luci/Service/Nodes/CircleConst.h> +#include <luci/Log.h> + +#include <algorithm> +#include <cmath> + +using namespace luci; + +namespace +{ + +bool has_min_max(const CircleNode *node) +{ + return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty(); +} + +} // namespace + +// QuantizeActivation +namespace luci +{ + +void QuantizeActivation::visit(luci::CircleNode *node) +{ + LOGGER(l); + INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl; + + // Check if this is already quantized + if (is_quantized(node)) + return; + + // Check if this is bool type (bool type is not quantized) + if (node->dtype() == loco::DataType::BOOL) + return; + + // Check if this is const (const activation is handled by QuantizeConstInputActivation) + // NOTE QuantizePreChecker guarantees weights/bias are const. + // Update this code when we accept non-const weights/bias. + if (node->opcode() == luci::CircleOpcode::CIRCLECONST) + return; + + // Check if this is activation + // We assume min/max are recorded only for activations + if (has_min_max(node)) + { + // Quantize using recorded min/max + auto quantparam = node->quantparam(); + assert(quantparam); + assert(quantparam->min.size() == 1); // only support layer-wise quant + assert(quantparam->max.size() == 1); // only support layer-wise quant + auto min = quantparam->min[0]; + auto max = quantparam->max[0]; + + float scaling_factor{0}; + int64_t zp{0}; + float nudged_min{0}; + float nudged_max{0}; + + if (output_type == loco::DataType::U8) + { + compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + node->dtype(loco::DataType::U8); + } + else + { + compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + node->dtype(loco::DataType::S16); + } + + node->quantparam()->scale.push_back(scaling_factor); + node->quantparam()->zerop.push_back(zp); + } + // Fix special attributes + if (node->opcode() == luci::CircleOpcode::CAST) + { + auto *cast = loco::must_cast<luci::CircleCast *>(node); + auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x()); + + // make sure that cast_input is already quantized + assert(cast_input->dtype() != loco::DataType::FLOAT32); + cast->in_data_type(cast_input->dtype()); + cast->out_data_type(cast->dtype()); + } +} + +} // namespace luci + +// QuantizeSpecialActivation +namespace luci +{ + +void QuantizeSpecialActivation::visit(luci::CircleNode *node) +{ + // Nodes fused with activation functions which need special quantization + auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node); + if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) + { + auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + node->quantparam(std::move(qparam)); + } +} + +void QuantizeSpecialActivation::visit(luci::CircleLogistic *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); + auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type); + node->quantparam(std::move(qparam)); +} + +void QuantizeSpecialActivation::visit(luci::CircleTanh *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); + auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + node->quantparam(std::move(qparam)); +} + +void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); + auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type); + node->quantparam(std::move(qparam)); +} + +void QuantizeSpecialActivation::visit(luci::CircleFloor *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::IntScale); + set_int_scale(node); +} + +void QuantizeSpecialActivation::visit(luci::CircleFloorDiv *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::IntScale); + set_int_scale(node); +} + +void QuantizeSpecialActivation::visit(luci::CircleFloorMod *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::IntScale); + set_int_scale(node); +} + +void QuantizeSpecialActivation::visit(luci::CircleCeil *node) +{ + assert(activation_qtype(node) == luci::ActivationQType::IntScale); + set_int_scale(node); +} + +} // namespace luci + +// QuantizeConstInputActivation +namespace luci +{ + +// Default behavior (NYI) +void QuantizeConstInputActivation::visit(luci::CircleNode *node) +{ + for (uint32_t i = 0; i < node->arity(); i++) + { + auto input_node = node->arg(i); + auto const_node = dynamic_cast<luci::CircleConst *>(input_node); + if (const_node != nullptr) + throw std::runtime_error("Unsupported Op for const inputs"); + } +} + +// INPUT_NAME is the only activation of NODE +#define QUANTIZE_SINGLE_CONST_INPUT(NODE, INPUT_NAME) \ + void QuantizeConstInputActivation::visit(NODE *node) \ + { \ + auto input = node->INPUT_NAME(); \ + auto const_node = dynamic_cast<luci::CircleConst *>(input); \ + if (const_node && !is_quantized(const_node)) \ + { \ + auto new_const = luci::clone(const_node); \ + quant_const(new_const, _output_type); \ + node->INPUT_NAME(new_const); \ + } \ + } + +// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE +#define QUANTIZE_TWO_CONST_INPUTS(NODE, INPUT_NAME1, INPUT_NAME2) \ + void QuantizeConstInputActivation::visit(NODE *node) \ + { \ + auto input1 = node->INPUT_NAME1(); \ + auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \ + if (const_node1 && !is_quantized(const_node1)) \ + { \ + auto new_const1 = luci::clone(const_node1); \ + quant_const(new_const1, _output_type); \ + node->INPUT_NAME1(new_const1); \ + } \ + auto input2 = node->INPUT_NAME2(); \ + auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \ + if (const_node2 && !is_quantized(const_node2)) \ + { \ + auto new_const2 = luci::clone(const_node2); \ + quant_const(new_const2, _output_type); \ + node->INPUT_NAME2(new_const2); \ + } \ + } + +// Ops that receive a single activation as an input +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleDepthToSpace, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleElu, features) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleExp, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleFloor, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleGather, params) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLocalResponseNormalization, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLogistic, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMean, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMirrorPad, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CirclePad, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceAny, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceProd, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMax, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMin, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReshape, tensor) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeBilinear, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeNearestNeighbor, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReverseSequence, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleRsqrt, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSlice, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSoftmax, logits) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToBatchND, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToDepth, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplit, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplitV, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSqrt, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleStridedSlice, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSum, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTanh, x) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTile, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTopKV2, input) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTranspose, a) +QUANTIZE_SINGLE_CONST_INPUT(luci::CircleUnpack, value) + +// Ops that receive two activations as inputs +QUANTIZE_TWO_CONST_INPUTS(luci::CircleAdd, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleBatchMatMul, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleDiv, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleEqual, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleFloorDiv, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreater, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreaterEqual, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleLess, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleLessEqual, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleMaximum, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleMinimum, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleMul, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleNotEqual, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CirclePow, x, y) +QUANTIZE_TWO_CONST_INPUTS(luci::CircleSub, x, y) + +// AddN has arbitrary number of inputs +void QuantizeConstInputActivation::visit(luci::CircleAddN *node) +{ + auto arity = node->arity(); + for (uint32_t i = 0; i < arity; i++) + { + auto input_node = node->inputs(i); + auto const_node = dynamic_cast<luci::CircleConst *>(input_node); + if (const_node && !is_quantized(const_node)) + { + auto new_const = luci::clone(const_node); + quant_const(new_const, _output_type); + node->inputs(i, new_const); + } + } +} + +#undef QUANTIZE_SINGLE_CONST_INPUT +#undef QUANTIZE_TWO_CONST_INPUTS + +} // namespace luci diff --git a/compiler/luci/pass/src/QuantizeActivation.h b/compiler/luci/pass/src/QuantizeActivation.h new file mode 100644 index 000000000..fc32d1cde --- /dev/null +++ b/compiler/luci/pass/src/QuantizeActivation.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_QUANTIZATION_ACTIVATION_H__ +#define __LUCI_QUANTIZATION_ACTIVATION_H__ + +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +/** + * @brief Quantize non-const activation using recorded min/max values + */ +struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<void> +{ + QuantizeActivation(loco::DataType input, loco::DataType output) + : input_type(input), output_type(output) + { + } + + loco::DataType input_type; + loco::DataType output_type; + + // Quantize each node using recorded min/max + void visit(luci::CircleNode *node); +}; + +/** + * @brief Quantize non-const activaion using pre-defined scale/zp for special Ops + */ +struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void> +{ + QuantizeSpecialActivation(loco::DataType input, loco::DataType output) + : input_type(input), output_type(output) + { + } + + loco::DataType input_type; + loco::DataType output_type; + + void visit(luci::CircleNode *node); + void visit(luci::CircleLogistic *node); + void visit(luci::CircleTanh *node); + void visit(luci::CircleSoftmax *node); + void visit(luci::CircleFloor *node); + void visit(luci::CircleFloorDiv *node); + void visit(luci::CircleFloorMod *node); + void visit(luci::CircleCeil *node); +}; + +// Quantize constant input activation of a node +// The input of a node is quantized if it is +// 1. Constant (instance of CircleConst*) +// 2. Activation (other inputs e.g., weights, bias, axis, etc should not be quantized here) +struct QuantizeConstInputActivation final : public luci::CircleNodeMutableVisitor<void> +{ + QuantizeConstInputActivation(loco::DataType output_type) : _output_type(output_type) {} + +private: + loco::DataType _output_type; + +// Skip NODE +#define SKIP(NODE) \ + void visit(NODE *) {} + + // Handled in QuantizeWeights and QuantizeBias + SKIP(luci::CircleConv2D) + SKIP(luci::CircleDepthwiseConv2D) + SKIP(luci::CircleFullyConnected) + SKIP(luci::CircleInstanceNorm) + SKIP(luci::CirclePRelu) + SKIP(luci::CircleTransposeConv) + + // Handled in PropagateQParamBackwardPass + SKIP(luci::CircleConcatenation) + SKIP(luci::CirclePadV2) + SKIP(luci::CirclePack) + SKIP(luci::CircleOneHot) + + // Inputs of logical Ops are bool, thus not quantized + SKIP(luci::CircleLogicalOr) + SKIP(luci::CircleLogicalAnd) + SKIP(luci::CircleLogicalNot) + +#undef SKIP + + // Default behavior (NYI) + void visit(luci::CircleNode *node); + + // Ops that receive a single activation as an input + void visit(luci::CircleArgMax *node); + void visit(luci::CircleArgMin *node); + void visit(luci::CircleBatchToSpaceND *node); + void visit(luci::CircleDepthToSpace *node); + void visit(luci::CircleElu *node); + void visit(luci::CircleExp *node); + void visit(luci::CircleFloor *node); + void visit(luci::CircleGather *node); + void visit(luci::CircleLocalResponseNormalization *node); + void visit(luci::CircleLogistic *node); + void visit(luci::CircleMean *node); + void visit(luci::CircleMirrorPad *node); + void visit(luci::CirclePad *node); + void visit(luci::CircleReduceAny *node); + void visit(luci::CircleReduceProd *node); + void visit(luci::CircleReduceMax *node); + void visit(luci::CircleReduceMin *node); + void visit(luci::CircleReshape *node); + void visit(luci::CircleResizeBilinear *node); + void visit(luci::CircleResizeNearestNeighbor *node); + void visit(luci::CircleReverseSequence *node); + void visit(luci::CircleRsqrt *node); + void visit(luci::CircleSlice *node); + void visit(luci::CircleSoftmax *node); + void visit(luci::CircleSpaceToBatchND *node); + void visit(luci::CircleSpaceToDepth *node); + void visit(luci::CircleSplit *node); + void visit(luci::CircleSplitV *node); + void visit(luci::CircleSqrt *node); + void visit(luci::CircleStridedSlice *node); + void visit(luci::CircleSum *node); + void visit(luci::CircleTanh *node); + void visit(luci::CircleTile *node); + void visit(luci::CircleTopKV2 *node); + void visit(luci::CircleTranspose *node); + void visit(luci::CircleUnpack *node); + + // Ops that receive two activations as inputs + void visit(luci::CircleAdd *node); + void visit(luci::CircleBatchMatMul *node); + void visit(luci::CircleDiv *node); + void visit(luci::CircleEqual *node); + void visit(luci::CircleFloorDiv *node); + void visit(luci::CircleGreater *node); + void visit(luci::CircleGreaterEqual *node); + void visit(luci::CircleLess *node); + void visit(luci::CircleLessEqual *node); + void visit(luci::CircleMaximum *node); + void visit(luci::CircleMinimum *node); + void visit(luci::CircleMul *node); + void visit(luci::CircleNotEqual *node); + void visit(luci::CirclePow *node); + void visit(luci::CircleSub *node); + + // AddN has arbitrary number of inputs + void visit(luci::CircleAddN *node); +}; + +} // namespace luci + +#endif // __LUCI_QUANTIZATION_ACTIVATION_H__ diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp new file mode 100644 index 000000000..aa496232a --- /dev/null +++ b/compiler/luci/pass/src/QuantizeBias.cpp @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizeBias.h" +#include "QuantizationUtils.h" + +#include <luci/Service/Nodes/CircleConst.h> +#include <luci/Log.h> + +#include <algorithm> +#include <cmath> + +using namespace luci; + +namespace +{ + +// struct to carry Input/Weights/Bias +struct IWB +{ + CircleNode *input = nullptr; + CircleNode *weights = nullptr; + CircleConst *bias = nullptr; + + IWB(loco::Node *i, loco::Node *w, loco::Node *b) + { + input = dynamic_cast<luci::CircleNode *>(i); + weights = dynamic_cast<luci::CircleNode *>(w); + bias = dynamic_cast<luci::CircleConst *>(b); + } + + // Return true if bias can be quantized with valid input an weights + operator bool() + { + if (bias == nullptr || is_quantized(bias)) + return false; + if (input == nullptr || weights == nullptr) + return false; + return true; + } +}; + +// Create a new const node from an existing node. +// The new node has the following characteristics +// type: T +// shape: same with 'node' (given as an argument) +// buffer size: 'size' (given as an argument) +// Note that contents are not filled in this function. +template <loco::DataType T> +luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size) +{ + auto new_node = node->graph()->nodes()->create<CircleConst>(); + // TODO: We don't have any naming convention for quantized nodes yet. + // Fix this when we have one. + new_node->name(node->name()); + new_node->dtype(T); + new_node->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + new_node->dim(i).set(node->dim(i).value()); + + new_node->size<T>(size); + new_node->shape_status(luci::ShapeStatus::VALID); + + return new_node; +} + +CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale, + float *scaling_factor, int64_t *zp) +{ + float scale = input_scale * weight_scale; + const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + std::vector<int32_t> quantized_values(size); + for (uint32_t i = 0; i < size; ++i) + { + quantized_values[i] = + static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); + } + + auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size); + + const int32_t kMinScale = std::numeric_limits<int32_t>::lowest(); + const int32_t kMaxScale = std::numeric_limits<int32_t>::max(); + for (uint32_t i = 0; i < size; ++i) + { + new_bias->at<loco::DataType::S32>(i) = + std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } + *scaling_factor = scale; + *zp = 0; + + return new_bias; +} + +CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale, + std::vector<float> &weight_scale, + std::vector<float> &scaling_factor, std::vector<int64_t> &zp) +{ + float scaling_factor_inv{0}; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + std::vector<int32_t> quantized_values(size); + + for (uint32_t i = 0; i < size; ++i) + { + scaling_factor[i] = input_scale * weight_scale[i]; + scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i]; + quantized_values[i] = + static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); + zp[i] = 0; + } + + auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size); + + const int32_t kMinScale = std::numeric_limits<int32_t>::lowest(); + const int32_t kMaxScale = std::numeric_limits<int32_t>::max(); + for (uint32_t i = 0; i < size; ++i) + { + new_bias->at<loco::DataType::S32>(i) = + std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } + + return new_bias; +} + +CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale, + std::vector<float> &weight_scale, + std::vector<float> &scaling_factor, + std::vector<int64_t> &zp) +{ + float scaling_factor_inv{0}; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + std::vector<int64_t> quantized_values(size); + + for (uint32_t i = 0; i < size; ++i) + { + scaling_factor[i] = input_scale * weight_scale[i]; + scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i]; + quantized_values[i] = + static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); + zp[i] = 0; + } + + auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size); + + for (uint32_t i = 0; i < size; ++i) + { + new_bias->at<loco::DataType::S64>(i) = quantized_values[i]; + } + + return new_bias; +} + +} // namespace + +namespace luci +{ + +// Return a quantized bias node +CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *weight, + CircleNode *bias) +{ + auto const_bias = loco::must_cast<luci::CircleConst *>(bias); + assert(const_bias->dtype() == loco::DataType::FLOAT32); + + // If input is const, it is quantized here, not in QuantizeActivation + if (auto const_input = dynamic_cast<luci::CircleConst *>(input)) + { + quant_const(const_input, output_type); + } + + CircleConst *new_bias = nullptr; + + if (granularity == QuantizationGranularity::ChannelWise) + { + auto input_q = input->quantparam(); + assert(input_q); + assert(input_q->scale.size() == 1); // input scale's layer-wise + auto input_scale = input_q->scale[0]; + + assert(weight->quantparam() != nullptr); // weight scale's channel-wise + auto weight_scale = weight->quantparam()->scale; + + uint32_t size = const_bias->size<loco::DataType::FLOAT32>(); + assert(size == weight_scale.size()); + std::vector<float> scaling_factor(size); + std::vector<int64_t> zp(size); + + if (output_type == loco::DataType::U8) + { + new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); + } + else if (output_type == loco::DataType::S16) + { + new_bias = + int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); + } + else + { + throw std::runtime_error("Unsupported quantization type."); + } + + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->scale = scaling_factor; + quantparam->zerop = zp; + assert(new_bias->quantparam() == nullptr); // bias should not be quantized before + new_bias->quantparam(std::move(quantparam)); + + return new_bias; + } + else + { + auto input_q = input->quantparam(); + assert(input_q); + assert(input_q->scale.size() == 1); // Only support per-layer quant + auto input_scale = input_q->scale[0]; + + auto weight_q = weight->quantparam(); + assert(weight_q); + assert(weight_q->scale.size() == 1); // Only support per-layer quant + auto weight_scale = weight_q->scale[0]; + + float scaling_factor{0}; + int64_t zp{0}; + new_bias = + asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp); + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->scale.push_back(scaling_factor); + quantparam->zerop.push_back(zp); + assert(new_bias->quantparam() == nullptr); // bias should not be quantized before + new_bias->quantparam(std::move(quantparam)); + + return new_bias; + } +} + +void QuantizeBias::visit(luci::CircleConv2D *node) +{ + LOGGER(l); + INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl; + + if (auto iwb = IWB(node->input(), node->filter(), node->bias())) + { + auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias); + node->bias(new_bias); + } +} + +void QuantizeBias::visit(luci::CircleDepthwiseConv2D *node) +{ + LOGGER(l); + INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl; + + if (auto iwb = IWB(node->input(), node->filter(), node->bias())) + { + auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias); + node->bias(new_bias); + } +} + +void QuantizeBias::visit(luci::CircleTransposeConv *node) +{ + LOGGER(l); + INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl; + + if (auto iwb = IWB(node->outBackprop(), node->filter(), node->bias())) + { + auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias); + node->bias(new_bias); + } +} + +void QuantizeBias::visit(luci::CircleFullyConnected *node) +{ + LOGGER(l); + INFO(l) << "QuantizeBias visit node: " << node->name() << std::endl; + + if (auto iwb = IWB(node->input(), node->weights(), node->bias())) + { + auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias); + node->bias(new_bias); + } +} + +} // namespace luci diff --git a/compiler/luci/pass/src/QuantizeBias.h b/compiler/luci/pass/src/QuantizeBias.h new file mode 100644 index 000000000..8de09df72 --- /dev/null +++ b/compiler/luci/pass/src/QuantizeBias.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_QUANTIZE_BIAS_H__ +#define __LUCI_QUANTIZE_BIAS_H__ + +#include <luci/Pass/QuantizationParameters.h> +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +/** + * @brief QuantizeBias quantizes tensors for bias + * @details Use input/weights scale to quantize values + */ +struct QuantizeBias final : public luci::CircleNodeMutableVisitor<void> +{ + QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr) + : input_type(input), output_type(output), granularity(gr) + { + } + + loco::DataType input_type; + loco::DataType output_type; + QuantizationGranularity granularity; + +private: + // Return a quantized bias node + CircleConst *quantized_bias(CircleNode *input, const CircleNode *weight, CircleNode *bias); + + void visit(luci::CircleConv2D *node); + void visit(luci::CircleDepthwiseConv2D *node); + void visit(luci::CircleTransposeConv *node); + void visit(luci::CircleFullyConnected *node); + + // Default behavior + void visit(luci::CircleNode *) {} +}; + +} // namespace luci + +#endif // __LUCI_QUANTIZE_BIAS_H__ diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp index c8ad87e3d..c9b35e0be 100644 --- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp +++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp @@ -16,9 +16,11 @@ #include "luci/Pass/QuantizeDequantizeWeightsPass.h" #include "QuantizationUtils.h" +#include "helpers/LayerInfoMap.h" #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> +#include <luci/Service/Nodes/CircleConst.h> #include <luci/Log.h> #include <loco/IR/TensorShape.h> @@ -251,7 +253,7 @@ void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_ * @brief QuantizeDequantizeWeights quantizes and dequantizes tensors for weights * @details Find min/max values on the fly, quantize the model, and dequantize the model */ -struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<bool> +struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<void> { QuantizeDequantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity granularity) @@ -263,88 +265,164 @@ struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<b loco::DataType output_type; QuantizationGranularity granularity; - // Quantize and dequantize input tensors of each node - bool visit(luci::CircleNode *node) +private: + // Fake quantize weights (Only u8 quantization is supported for LWQ) + void fake_quantize_lwq(luci::CircleConst *weights) const { - assert(output_type == loco::DataType::U8 || output_type == loco::DataType::S16); - LOGGER(l); - INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl; - auto arity = node->arity(); - for (uint32_t i = 0; i < arity; i++) + assert(output_type == loco::DataType::U8); // FIX_CALLER_UNLESS + + // Find min/max per layer + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++) { - auto input_node = node->arg(i); - auto circle_node = loco::must_cast<luci::CircleNode *>(input_node); + auto data = weights->at<loco::DataType::FLOAT32>(i); + min = data < min ? data : min; + max = data > max ? data : max; + } + float scaling_factor{0}; + int64_t zp{0}; + float nudged_min{0}; + float nudged_max{0}; + + asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min, + nudged_max); + asymmetric_wdequant_with_minmax_per_layer(weights, scaling_factor, nudged_min); + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->min.push_back(nudged_min); + quantparam->max.push_back(nudged_max); + quantparam->scale.push_back(scaling_factor); + quantparam->zerop.push_back(zp); + weights->quantparam(std::move(quantparam)); + } - // Check if this is already quantized - if (is_quantized(circle_node)) - continue; +private: + // Fake quantize weights (u8/s16 quantization are supported for CWQ) + void fake_quantize_cwq(luci::CircleConst *weights) const + { + assert(output_type == loco::DataType::U8 || + output_type == loco::DataType::S16); // FIX_CALLER_UNLESS - if (is_weights(circle_node)) - { - auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node); + // Find min/max per channel + std::vector<float> min; + std::vector<float> max; - // Find min/max per channel-wise - if (granularity == QuantizationGranularity::ChannelWise) - { - std::vector<float> min; - std::vector<float> max; - - cal_minmax_per_channel(circle_const, min, max); - - std::vector<float> nudged_min(min.size()); - std::vector<float> nudged_max(min.size()); - std::vector<float> scaling_factor(min.size()); - std::vector<int64_t> zp(min.size()); - - if (output_type == loco::DataType::U8) - { - asymmetric_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min, - nudged_max); - asymmetric_wdequant_per_channel(circle_const, scaling_factor, nudged_min); - } - else - { - sym_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min, - nudged_max); - sym_wdequant_per_channel(circle_const, scaling_factor); - } - - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->min = nudged_min; - quantparam->max = nudged_max; - quantparam->scale = scaling_factor; - quantparam->zerop = zp; - circle_node->quantparam(std::move(quantparam)); - } - // Find min/max per layer-wise - else - { - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - for (uint32_t i = 0; i < circle_const->size<loco::DataType::FLOAT32>(); i++) - { - auto data = circle_const->at<loco::DataType::FLOAT32>(i); - min = data < min ? data : min; - max = data > max ? data : max; - } - float scaling_factor{0}; - int64_t zp{0}; - float nudged_min{0}; - float nudged_max{0}; - - asymmetric_wquant_with_minmax_per_layer(circle_const, min, max, scaling_factor, zp, - nudged_min, nudged_max); - asymmetric_wdequant_with_minmax_per_layer(circle_const, scaling_factor, nudged_min); - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->min.push_back(nudged_min); - quantparam->max.push_back(nudged_max); - quantparam->scale.push_back(scaling_factor); - quantparam->zerop.push_back(zp); - circle_node->quantparam(std::move(quantparam)); - } - } + cal_minmax_per_channel(weights, min, max); + + std::vector<float> nudged_min(min.size()); + std::vector<float> nudged_max(min.size()); + std::vector<float> scaling_factor(min.size()); + std::vector<int64_t> zp(min.size()); + + if (output_type == loco::DataType::U8) + { + asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max); + asymmetric_wdequant_per_channel(weights, scaling_factor, nudged_min); + } + else + { + sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max); + sym_wdequant_per_channel(weights, scaling_factor); } - return false; + + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->min = nudged_min; + quantparam->max = nudged_max; + quantparam->scale = scaling_factor; + quantparam->zerop = zp; + weights->quantparam(std::move(quantparam)); + } + +private: + void fake_quantize(luci::CircleConst *weights) const + { + switch (granularity) + { + case luci::QuantizationGranularity::ChannelWise: + fake_quantize_cwq(weights); + break; + case luci::QuantizationGranularity::LayerWise: + fake_quantize_lwq(weights); + break; + default: + throw std::invalid_argument("Unsupported granularity"); + } + } + +private: + // Check if + // 1. node is const + // 2. node was not quantized + bool is_quantizable(loco::Node *node) + { + auto const_node = dynamic_cast<luci::CircleConst *>(node); + if (not const_node) + return false; + + // Skip if this is already quantized + if (is_quantized(const_node)) + return false; + + return true; + } + + // Default behavior (Do nothing) + void visit(luci::CircleNode *) {} + + void visit(luci::CircleConv2D *node) + { + LOGGER(l); + INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl; + + if (not is_quantizable(node->filter())) + return; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + auto new_weights = luci::clone(weights); + node->filter(new_weights); + fake_quantize(new_weights); + } + + void visit(luci::CircleDepthwiseConv2D *node) + { + LOGGER(l); + INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl; + + if (not is_quantizable(node->filter())) + return; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + auto new_weights = luci::clone(weights); + node->filter(new_weights); + fake_quantize(new_weights); + } + + void visit(luci::CircleTransposeConv *node) + { + LOGGER(l); + INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl; + + if (not is_quantizable(node->filter())) + return; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + auto new_weights = luci::clone(weights); + node->filter(new_weights); + fake_quantize(new_weights); + } + + void visit(luci::CircleFullyConnected *node) + { + LOGGER(l); + INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl; + + if (not is_quantizable(node->weights())) + return; + + auto weights = loco::must_cast<luci::CircleConst *>(node->weights()); + auto new_weights = luci::clone(weights); + node->weights(new_weights); + fake_quantize(new_weights); } }; @@ -355,11 +433,36 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g) LOGGER(l); INFO(l) << "QuantizeDequantizeWeightsPass Start" << std::endl; + auto info_by_name = layer_info_map(g, _ctx->layers_info); + + auto quantize_dtype = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization dtype + if (iter != info_by_name.end()) + return iter->second.dtype; + + // Return default quantization dtype + return _ctx->output_model_dtype; + }; + + auto quantize_granularity = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization granularity + if (iter != info_by_name.end()) + return iter->second.granularity; + + // Return default quantization granularity + return _ctx->granularity; + }; + // Quantize weights for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); + QuantizeDequantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node), + quantize_granularity(circle_node)); circle_node->accept(&qw); } diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp index f226253c2..15f5ca7ac 100644 --- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp +++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp @@ -25,3 +25,17 @@ TEST(QuantizeDequantizeWeightsPassTest, name) auto const name = pass.name(); ASSERT_NE(nullptr, name); } + +TEST(QuantizeDequantizeWeightsPassTest, name_ctx) +{ + auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>(); + { + ctx->input_model_dtype = loco::DataType::FLOAT32; + ctx->output_model_dtype = loco::DataType::U8; + ctx->granularity = luci::QuantizationGranularity::LayerWise; + } + + luci::QuantizeDequantizeWeightsPass pass(std::move(ctx)); + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp new file mode 100644 index 000000000..4b3b7e330 --- /dev/null +++ b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/QuantizePreCheckerPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +#include <luci/Log.h> + +namespace luci +{ + +namespace +{ + +void check_const_opcode(luci::CircleNode *node) +{ + if (node == nullptr) + return; + + if (node->opcode() != luci::CircleOpcode::CIRCLECONST and + node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE) + { + throw std::runtime_error("Unsupported non const input " + node->name()); + } +} + +struct ConstInputChecker final : public luci::CircleNodeMutableVisitor<void> +{ +// INPUT_NAME is name for input const for current NODE +#define CHECK_NODE_WITH_ONE_INPUT_CONST(NODE, INPUT_NAME) \ + void visit(NODE *node) \ + { \ + const auto input = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME()); \ + check_const_opcode(input); \ + } + +// INPUT_NAME_1 and INPUT_NAME_2 are names for input const for current NODE +#define CHECK_NODE_WITH_TWO_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2) \ + void visit(NODE *node) \ + { \ + const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \ + const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \ + \ + check_const_opcode(input_1); \ + check_const_opcode(input_2); \ + } + +// INPUT_NAME_1, INPUT_NAME_2 and INPUT_NAME_3 are names for input const for current NODE +#define CHECK_NODE_WITH_THREE_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2, INPUT_NAME_3) \ + void visit(NODE *node) \ + { \ + const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \ + const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \ + const auto input_3 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_3()); \ + \ + check_const_opcode(input_1); \ + check_const_opcode(input_2); \ + check_const_opcode(input_3); \ + } + + // Skip other circle node + void visit(luci::CircleNode *) {} + + // Ops that receive one const nodes as inputs + CHECK_NODE_WITH_ONE_INPUT_CONST(luci::CirclePRelu, alpha) + + // Ops that receive two const node as an inputs + CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleConv2D, filter, bias) + CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleDepthwiseConv2D, filter, bias) + CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleFullyConnected, weights, bias) + CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleInstanceNorm, gamma, beta) + + // Ops that receive three const nodes as an inputs + CHECK_NODE_WITH_THREE_INPUT_CONST(luci::CircleTransposeConv, inputSizes, filter, bias) + +#undef CHECK_NODE_WITH_ONE_INPUT_CONST +#undef CHECK_NODE_WITH_TWO_INPUT_CONST +#undef CHECK_NODE_WITH_THREE_INPUT_CONST +}; + +} // namespace + +/** + * Verify the input model has the form acceptable by quantizer + */ +bool QuantizePreCheckerPass::run(loco::Graph *g) +{ + LOGGER(l); + INFO(l) << "QuantizePreCheckerPass Start" << std::endl; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + // Check const inputs + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + ConstInputChecker checker{}; + circle_node->accept(&checker); + } + + INFO(l) << "QuantizePreCheckerPass End" << std::endl; + + return false; // one time run +} + +} // namespace luci diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp new file mode 100644 index 000000000..788353cd8 --- /dev/null +++ b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/QuantizePreCheckerPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +class SimpleConv2DGraph +{ +public: + SimpleConv2DGraph(bool make_valid) + { + conv2d_node = g.nodes()->create<luci::CircleConv2D>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + filter = g.nodes()->create<luci::CircleConst>(); + + conv2d_node->input(input_1); + conv2d_node->filter(filter); + + if (make_valid) + { + bias = g.nodes()->create<luci::CircleConst>(); + conv2d_node->bias(bias); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + conv2d_node->bias(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(conv2d_node); + } + +public: + loco::Graph g; + +private: + luci::CircleConv2D *conv2d_node = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *filter = nullptr; + luci::CircleConst *bias = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class SimpleDepthConv2DGraph +{ +public: + SimpleDepthConv2DGraph(bool make_valid) + { + depth_conv2d_node = g.nodes()->create<luci::CircleDepthwiseConv2D>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + filter = g.nodes()->create<luci::CircleConst>(); + + depth_conv2d_node->input(input_1); + depth_conv2d_node->filter(filter); + + if (make_valid) + { + bias = g.nodes()->create<luci::CircleConst>(); + depth_conv2d_node->bias(bias); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + depth_conv2d_node->bias(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(depth_conv2d_node); + } + +public: + loco::Graph g; + +private: + luci::CircleDepthwiseConv2D *depth_conv2d_node = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *filter = nullptr; + luci::CircleConst *bias = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class SimpleFCGraph +{ +public: + SimpleFCGraph(bool make_valid) + { + fc_node = g.nodes()->create<luci::CircleFullyConnected>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + weights = g.nodes()->create<luci::CircleConst>(); + + fc_node->input(input_1); + fc_node->weights(weights); + + if (make_valid) + { + bias = g.nodes()->create<luci::CircleConst>(); + fc_node->bias(bias); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + fc_node->bias(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(fc_node); + } + +public: + loco::Graph g; + +private: + luci::CircleFullyConnected *fc_node = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *weights = nullptr; + luci::CircleConst *bias = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class SimpleInstanceNormGraph +{ +public: + SimpleInstanceNormGraph(bool make_valid) + { + instance_norm_node = g.nodes()->create<luci::CircleInstanceNorm>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + gamma = g.nodes()->create<luci::CircleConst>(); + + instance_norm_node->input(input_1); + instance_norm_node->gamma(gamma); + + if (make_valid) + { + beta = g.nodes()->create<luci::CircleConst>(); + instance_norm_node->beta(beta); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + instance_norm_node->beta(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(instance_norm_node); + } + +public: + loco::Graph g; + +private: + luci::CircleInstanceNorm *instance_norm_node = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *gamma = nullptr; + luci::CircleConst *beta = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class SimpleTransposeConvGraph +{ +public: + SimpleTransposeConvGraph(bool make_valid) + { + transpose_conv = g.nodes()->create<luci::CircleTransposeConv>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + + input_sizes = g.nodes()->create<luci::CircleConst>(); + filter = g.nodes()->create<luci::CircleConst>(); + + transpose_conv->outBackprop(input_1); + transpose_conv->filter(filter); + transpose_conv->inputSizes(input_sizes); + + if (make_valid) + { + bias = g.nodes()->create<luci::CircleConst>(); + transpose_conv->bias(bias); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + transpose_conv->bias(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(transpose_conv); + } + +public: + loco::Graph g; + +private: + luci::CircleTransposeConv *transpose_conv = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *input_sizes = nullptr; + luci::CircleConst *filter = nullptr; + luci::CircleConst *bias = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class SimplePReluGraph +{ +public: + SimplePReluGraph(bool make_valid) + { + prelu = g.nodes()->create<luci::CirclePRelu>(); + input_1 = g.nodes()->create<luci::CircleInput>(); + + prelu->input(input_1); + + if (make_valid) + { + alpha = g.nodes()->create<luci::CircleConst>(); + prelu->alpha(alpha); + } + else + { + input_2 = g.nodes()->create<luci::CircleInput>(); + prelu->alpha(input_2); + } + + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + output->from(prelu); + } + +public: + loco::Graph g; + +private: + luci::CirclePRelu *prelu = nullptr; + luci::CircleInput *input_1 = nullptr; + luci::CircleInput *input_2 = nullptr; + luci::CircleConst *alpha = nullptr; + luci::CircleOutput *output = nullptr; +}; + +TEST(QuantizePreCheckerPassTest, name) +{ + luci::QuantizePreCheckerPass pass{}; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +// Test Conv2d +TEST(QuantizePreCheckerPassTest, conv2d) +{ + SimpleConv2DGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, conv2d_NEG) +{ + SimpleConv2DGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} + +// Test DepthwiseConv2d +TEST(QuantizePreCheckerPassTest, depthwise_conv2d) +{ + SimpleDepthConv2DGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, depthwise_conv2d_NEG) +{ + SimpleDepthConv2DGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} + +// Test FullyConnected +TEST(QuantizePreCheckerPassTest, fully_connected) +{ + SimpleFCGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, fully_connected_NEG) +{ + SimpleFCGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} + +// Test InstanceNorm +TEST(QuantizePreCheckerPassTest, instance_norm) +{ + SimpleInstanceNormGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, instance_norm_NEG) +{ + SimpleInstanceNormGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} + +// Test TransposeConv +TEST(QuantizePreCheckerPassTest, transpose_conv) +{ + SimpleTransposeConvGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, transpose_conv_NEG) +{ + SimpleTransposeConvGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} + +// Test PRelu +TEST(QuantizePreCheckerPassTest, prelu) +{ + SimplePReluGraph valid_graph(true); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_NO_THROW(checker.run(&valid_graph.g)); +} + +TEST(QuantizePreCheckerPassTest, prelu_NEG) +{ + SimplePReluGraph invalid_graph(false); + + luci::QuantizePreCheckerPass checker{}; + + EXPECT_ANY_THROW(checker.run(&invalid_graph.g)); +} diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp new file mode 100644 index 000000000..11322ab44 --- /dev/null +++ b/compiler/luci/pass/src/QuantizeWeights.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizeWeights.h" +#include "QuantizationUtils.h" + +#include <luci/Service/Nodes/CircleConst.h> +#include <luci/Log.h> + +#include <cmath> +#include <vector> +#include <functional> + +using namespace luci; + +namespace +{ + +using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>; + +void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func) +{ + loco::TensorShape dimension; + dimension.rank(4); + uint32_t indices[4] = { + 0, + }; + + if (!get_channel_dim_index(node, dimension, channel_dim_index)) + { + assert(false); + return; + } + + for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++) + { + for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++) + { + for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++) + { + for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++) + { + func(indices, dimension, channel_dim_index); + } + } + } + } +} + +void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min, + std::vector<float> &scaling_factor, int32_t &channel_dim_index) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + + const int32_t kMinScale = 0; + const int32_t kMaxScale = 255; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + std::vector<int32_t> quantized_values(size); + + auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) { + int channel_idx = indices[channel_dim_index]; + const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx]; + auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices)); + quantized_values[cal_offset(dimension, indices)] = + static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv)); + }; + + iterate_per_channel(node, channel_dim_index, quantize); + + node->dtype(loco::DataType::U8); // change the type of tensor + node->size<loco::DataType::U8>(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + { + node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } +} + +void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor, + int32_t &channel_dim_index) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + + const int32_t kMaxScale = std::numeric_limits<int16_t>::max(); + const int32_t kMinScale = -kMaxScale; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + std::vector<int32_t> quantized_values(size); + + auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) { + int channel_idx = indices[channel_dim_index]; + const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx]; + auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices)); + quantized_values[cal_offset(dimension, indices)] = + static_cast<int32_t>(std::round(data * scaling_factor_inv)); + }; + + iterate_per_channel(node, channel_dim_index, quantize); + + node->dtype(loco::DataType::S16); // change the type of tensor + node->size<loco::DataType::S16>(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + { + node->at<loco::DataType::S16>(i) = + std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } +} + +void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor) +{ + const int32_t kMinScale = 0; + const int32_t kMaxScale = 255; + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + + const float scaling_factor_inv = 1.0 / scaling_factor; + std::vector<int32_t> quantized_values(size); + for (uint32_t i = 0; i < size; ++i) + { + auto data = node->at<loco::DataType::FLOAT32>(i); + quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv)); + } + + node->dtype(loco::DataType::U8); // change the type of tensor + node->size<loco::DataType::U8>(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + { + node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } +} + +// Quantize const per channel +// +// The last dimension of const is the same as the dimension of channel +// And the rest of the const dimensions should be 1 +// So, a 'single value' is quantized per channel +// +// Quantization spec (f: fp value, q: quantized value) +// +// uint8 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1] +// +// int16 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0] +void quant_const_per_channel(CircleConst *node, loco::DataType quant_type) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + assert(node->rank() > 0); + + for (uint32_t i = 0; i < node->rank() - 1; i++) + { + // Caller should call this function when the below condition is satisfied + if (node->dim(i).value() != 1) + throw std::runtime_error("Non-channel dimension of const node must be 1"); + } + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + assert(size == node->dim(node->rank() - 1).value()); + + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->quantized_dimension = node->rank() - 1; + std::vector<int32_t> quantized_data(size); + + for (uint32_t i = 0; i < size; ++i) + { + auto data = node->at<loco::DataType::FLOAT32>(i); + if (quant_type == loco::DataType::U8) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantparam->zerop.push_back(0); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantparam->zerop.push_back(1); + quantized_data[i] = 0; + } + } + else if (quant_type == loco::DataType::S16) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantized_data[i] = -1; + } + quantparam->zerop.push_back(0); + } + } + node->quantparam(std::move(quantparam)); + + switch (quant_type) + { + case loco::DataType::U8: + node->dtype(loco::DataType::U8); + node->size<loco::DataType::U8>(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == 0 || quantized_data[i] == 1); + node->at<loco::DataType::U8>(i) = quantized_data[i]; + } + break; + case loco::DataType::S16: + node->dtype(loco::DataType::S16); + node->size<loco::DataType::S16>(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == -1 || quantized_data[i] == 1); + node->at<loco::DataType::S16>(i) = quantized_data[i]; + } + break; + default: + throw std::runtime_error("Unsupported data type"); + } +} + +} // namespace + +namespace luci +{ + +void QuantizeWeights::quantize_weights(luci::CircleConst *weights) +{ + // Find min/max per channel-wise + if (granularity == QuantizationGranularity::ChannelWise) + { + auto quantparam = weights->quantparam(); + if (quantparam == nullptr) + { + assert(false && "quantparam is nullptr"); + return; + } + + auto min = quantparam->min; + auto scaling_factor = quantparam->scale; + int32_t channel_dim_index = 0; + + if (output_type == loco::DataType::U8) + { + asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index); + } + else + { + sym_wquant_per_channel(weights, scaling_factor, channel_dim_index); + } + quantparam->min.clear(); + quantparam->max.clear(); + quantparam->quantized_dimension = channel_dim_index; + } + // Find min/max per layer-wise + else + { + // Quantize using recorded quantparam + auto quantparam = weights->quantparam(); + assert(quantparam != nullptr); + assert(quantparam->min.size() == 1); // only support layer-wise quant + assert(quantparam->scale.size() == 1); // only support layer-wise quant + auto min = quantparam->min[0]; + auto scaling_factor = quantparam->scale[0]; + asym_wquant_per_layer(weights, min, scaling_factor); + quantparam->min.clear(); + quantparam->max.clear(); + } +} +void QuantizeWeights::visit(luci::CircleConv2D *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + if (!is_quantized(weights)) + { + auto new_weights = luci::clone(weights); + node->filter(new_weights); + quantize_weights(new_weights); + } +} + +void QuantizeWeights::visit(luci::CircleDepthwiseConv2D *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + if (!is_quantized(weights)) + { + auto new_weights = luci::clone(weights); + node->filter(new_weights); + quantize_weights(new_weights); + } +} + +void QuantizeWeights::visit(luci::CircleInstanceNorm *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma()); + auto beta = loco::must_cast<luci::CircleConst *>(node->beta()); + + if (!is_quantized(gamma)) + { + assert(gamma->dtype() == loco::DataType::FLOAT32); + auto new_gamma = luci::clone(gamma); + if (granularity == QuantizationGranularity::LayerWise) + quant_const(new_gamma, output_type); + else if (granularity == QuantizationGranularity::ChannelWise) + quant_const_per_channel(new_gamma, output_type); + node->gamma(new_gamma); + } + if (!is_quantized(beta)) + { + assert(beta->dtype() == loco::DataType::FLOAT32); + auto new_beta = luci::clone(beta); + if (granularity == QuantizationGranularity::LayerWise) + quant_const(new_beta, output_type); + else if (granularity == QuantizationGranularity::ChannelWise) + quant_const_per_channel(new_beta, output_type); + node->beta(new_beta); + } +} + +void QuantizeWeights::visit(luci::CirclePRelu *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha()); + + if (!is_quantized(alpha)) + { + assert(alpha->dtype() == loco::DataType::FLOAT32); + auto new_alpha = luci::clone(alpha); + if (granularity == QuantizationGranularity::LayerWise) + quant_const(new_alpha, output_type); + else if (granularity == QuantizationGranularity::ChannelWise) + quant_const_per_channel(new_alpha, output_type); + node->alpha(new_alpha); + } +} + +void QuantizeWeights::visit(luci::CircleTransposeConv *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); + if (!is_quantized(weights)) + { + auto new_weights = luci::clone(weights); + node->filter(new_weights); + quantize_weights(new_weights); + } +} + +void QuantizeWeights::visit(luci::CircleFullyConnected *node) +{ + LOGGER(l); + INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl; + + auto weights = loco::must_cast<luci::CircleConst *>(node->weights()); + if (!is_quantized(weights)) + { + auto new_weights = luci::clone(weights); + node->weights(new_weights); + quantize_weights(new_weights); + } +} + +void QuantizeWeights::visit(luci::CircleNode *) {} + +} // namespace luci diff --git a/compiler/luci/pass/src/QuantizeWeights.h b/compiler/luci/pass/src/QuantizeWeights.h new file mode 100644 index 000000000..f62cd40f3 --- /dev/null +++ b/compiler/luci/pass/src/QuantizeWeights.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_QUANTIZE_WEIGHTS_H__ +#define __LUCI_QUANTIZE_WEIGHTS_H__ + +#include <luci/Pass/QuantizationParameters.h> +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +/** + * @brief QuantizeWeights quantizes tensors for weights + * @details Find min/max values on the fly and then quantize + */ +struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<void> +{ + QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr) + : input_type(input), output_type(output), granularity(gr) + { + } + + loco::DataType input_type; + loco::DataType output_type; + QuantizationGranularity granularity; + +private: + void quantize_weights(luci::CircleConst *weights); + + void visit(luci::CircleConv2D *node); + void visit(luci::CircleDepthwiseConv2D *node); + void visit(luci::CircleInstanceNorm *node); + void visit(luci::CirclePRelu *node); + void visit(luci::CircleTransposeConv *node); + void visit(luci::CircleFullyConnected *node); + void visit(luci::CircleNode *); +}; + +} // namespace luci + +#endif // __LUCI_QUANTIZE_WEIGHTS_H__ diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index c3552ec52..d9a9d4db7 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -15,55 +15,32 @@ */ #include "luci/Pass/QuantizeWithMinMaxPass.h" +#include "luci/Pass/PropagateQParamForwardPass.h" +#include "luci/Pass/PropagateQParamBackwardPass.h" +#include "luci/Pass/RemoveRedundantQuantizePass.h" +#include "QuantizeActivation.h" +#include "QuantizeWeights.h" +#include "QuantizeBias.h" #include "QuantizationUtils.h" +#include "ProgressReporter.h" +#include "helpers/LayerInfoMap.h" #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> #include <luci/Service/Nodes/CircleConst.h> #include <luci/Profile/CircleNodeOrigin.h> #include <luci/Log.h> +#include <logo/Phase.h> #include <oops/UserExn.h> #include <iostream> #include <cmath> -#include <functional> namespace { using namespace luci; -using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>; - -void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func) -{ - loco::TensorShape dimension; - dimension.rank(4); - uint32_t indices[4] = { - 0, - }; - - if (!get_channel_dim_index(node, dimension, channel_dim_index)) - { - assert(false); - return; - } - - for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++) - { - for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++) - { - for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++) - { - for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++) - { - func(indices, dimension, channel_dim_index); - } - } - } - } -} - // Create a Quantize Op whose // dtype is out_type // shape is the same with node @@ -80,7 +57,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType quantize->shape_status(luci::ShapeStatus::VALID); auto qparam = node->quantparam(); - assert(qparam); // FIX_CALLER_UNLESS + assert(qparam); // FIX_CALLER_UNLESS + + auto qtype = luci::activation_qtype(node); + if (qtype == ActivationQType::PreDefinedValue) + { + quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type)); + return quantize; + } + + assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale); + assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS auto min = qparam->min[0]; @@ -104,9 +91,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType auto quantparam = std::make_unique<CircleQuantParam>(); quantparam->scale.push_back(scaling_factor); quantparam->zerop.push_back(zp); + // Save original min/max (not nudged_min/max). Nudged min/max + // is different from the real min/max values, causing wrong + // qparam when quantization dtype is changed. + quantparam->min.push_back(min); + quantparam->max.push_back(max); quantize->quantparam(std::move(quantparam)); + if (qtype == ActivationQType::IntScale) + set_int_scale(quantize); + return quantize; } @@ -118,1412 +113,232 @@ namespace luci namespace { -// Create a new const node from an existing node. -// The new node has the following characteristics -// type: T -// shape: same with 'node' (given as an argument) -// buffer size: 'size' (given as an argument) -// Note that contents are not filled in this function. -template <loco::DataType T> -luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size) -{ - auto new_node = node->graph()->nodes()->create<CircleConst>(); - // TODO: We don't have any naming convention for quantized nodes yet. - // Fix this when we have one. - new_node->name(node->name()); - new_node->dtype(T); - new_node->rank(node->rank()); - for (uint32_t i = 0; i < node->rank(); i++) - new_node->dim(i).set(node->dim(i).value()); - - new_node->size<T>(size); - new_node->shape_status(luci::ShapeStatus::VALID); - - return new_node; -} - -void overwrite_quantparam(luci::CircleNode *source, luci::CircleNode *target) -{ - auto source_qparam = source->quantparam(); - if (source_qparam == nullptr) - throw std::runtime_error("source quantparam is not found during overwrite"); - - auto target_qparam = target->quantparam(); - if (target_qparam == nullptr) - { - auto quantparam = std::make_unique<CircleQuantParam>(); - target->quantparam(std::move(quantparam)); - target_qparam = target->quantparam(); - - if (target_qparam == nullptr) - throw std::runtime_error("Creating new quant param failed"); - } - target_qparam->min = source_qparam->min; - target_qparam->max = source_qparam->max; - target_qparam->scale = source_qparam->scale; - target_qparam->zerop = source_qparam->zerop; - target_qparam->quantized_dimension = source_qparam->quantized_dimension; -} - -void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop, - loco::DataType quant_type) -{ - uint32_t size = const_node->size<loco::DataType::FLOAT32>(); - - const float scaling_factor_inv = 1.0 / scaling_factor; - std::vector<int32_t> quantized_values(size); - for (uint32_t i = 0; i < size; ++i) - { - auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i)); - double quantized_float = std::round(data * scaling_factor_inv) + zerop; - constexpr auto int_max = static_cast<double>(std::numeric_limits<int32_t>::max()); - constexpr auto int_min = static_cast<double>(std::numeric_limits<int32_t>::min()); - quantized_float = std::min(int_max, std::max(int_min, quantized_float)); - - quantized_values[i] = static_cast<int32_t>(quantized_float); - } - - switch (quant_type) - { - case loco::DataType::U8: - const_node->dtype(loco::DataType::U8); // change the type of tensor - const_node->size<loco::DataType::U8>(size); // resize tensor - for (uint32_t i = 0; i < size; ++i) - const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i])); - break; - case loco::DataType::S16: - assert(zerop == 0); - const_node->dtype(loco::DataType::S16); // change the type of tensor - const_node->size<loco::DataType::S16>(size); // resize tensor - for (uint32_t i = 0; i < size; ++i) - const_node->at<loco::DataType::S16>(i) = - std::min(32767, std::max(-32767, quantized_values[i])); - break; - default: - throw std::runtime_error("Unsupported data type"); - } -} - -// Quantize const per channel -// -// The last dimension of const is the same as the dimension of channel -// And the rest of the const dimensions should be 1 -// So, a 'single value' is quantized per channel -// -// Quantization spec (f: fp value, q: quantized value) -// -// uint8 -// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] -// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1] -// -// int16 -// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] -// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0] -void quant_const_per_channel(CircleConst *node, loco::DataType quant_type) -{ - assert(node->dtype() == loco::DataType::FLOAT32); - assert(node->rank() > 0); - - for (uint32_t i = 0; i < node->rank() - 1; i++) - { - // Caller should call this function when the below condition is satisfied - if (node->dim(i).value() != 1) - throw std::runtime_error("Non-channel dimension of const node must be 1"); - } - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - assert(size == node->dim(node->rank() - 1).value()); - - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->quantized_dimension = node->rank() - 1; - std::vector<int32_t> quantized_data(size); - - for (uint32_t i = 0; i < size; ++i) - { - auto data = node->at<loco::DataType::FLOAT32>(i); - if (quant_type == loco::DataType::U8) - { - if (data >= 0) - { - quantparam->scale.push_back(data); - quantparam->zerop.push_back(0); - quantized_data[i] = 1; - } - else - { - quantparam->scale.push_back(-data); - quantparam->zerop.push_back(1); - quantized_data[i] = 0; - } - } - else if (quant_type == loco::DataType::S16) - { - if (data >= 0) - { - quantparam->scale.push_back(data); - quantized_data[i] = 1; - } - else - { - quantparam->scale.push_back(-data); - quantized_data[i] = -1; - } - quantparam->zerop.push_back(0); - } - } - node->quantparam(std::move(quantparam)); - - switch (quant_type) - { - case loco::DataType::U8: - node->dtype(loco::DataType::U8); - node->size<loco::DataType::U8>(size); - for (uint32_t i = 0; i < size; ++i) - { - assert(quantized_data[i] == 0 || quantized_data[i] == 1); - node->at<loco::DataType::U8>(i) = quantized_data[i]; - } - break; - case loco::DataType::S16: - node->dtype(loco::DataType::S16); - node->size<loco::DataType::S16>(size); - for (uint32_t i = 0; i < size; ++i) - { - assert(quantized_data[i] == -1 || quantized_data[i] == 1); - node->at<loco::DataType::S16>(i) = quantized_data[i]; - } - break; - default: - throw std::runtime_error("Unsupported data type"); - } -} - -void quant_const(CircleConst *node, loco::DataType quant_type) -{ - assert(node->dtype() == loco::DataType::FLOAT32); - - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++) - { - auto data = node->at<loco::DataType::FLOAT32>(i); - min = data < min ? data : min; - max = data > max ? data : max; - } - - float scaling_factor{0.0}; - int64_t zp{0}; - float nudged_min{0.0}; - float nudged_max{0.0}; - - switch (quant_type) - { - case loco::DataType::U8: - asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, - nudged_max); - break; - case loco::DataType::S16: - symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, - nudged_max); - break; - default: - throw std::runtime_error("Unsupported data type"); - } - - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->scale.push_back(scaling_factor); - quantparam->zerop.push_back(zp); - node->quantparam(std::move(quantparam)); -} - -// Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer -// Returns a list of <input, weights, output> vectors for the above operators. -// Note that it returns a 'list' because bias can be used by multiple operators. -std::vector<std::vector<loco::Node *>> get_input_weight_output_of_bias(CircleNode *node) -{ - std::vector<std::vector<loco::Node *>> result; - auto circle_const = dynamic_cast<CircleConst *>(node); - if (circle_const == nullptr) - return result; - - auto succs = loco::succs(node); - - for (auto out : succs) - { - auto conv = dynamic_cast<CircleConv2D *>(out); - if (conv != nullptr && conv->bias() == circle_const) - { - assert(conv->input() != nullptr); - assert(conv->filter() != nullptr); - result.push_back({conv->input(), conv->filter(), conv}); - continue; - } - auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out); - if (dw_conv != nullptr && dw_conv->bias() == circle_const) - { - assert(dw_conv->input() != nullptr); - assert(dw_conv->filter() != nullptr); - result.push_back({dw_conv->input(), dw_conv->filter(), dw_conv}); - continue; - } - auto fc = dynamic_cast<CircleFullyConnected *>(out); - if (fc != nullptr && fc->bias() == circle_const) - { - assert(fc->input() != nullptr); - assert(fc->weights() != nullptr); - result.push_back({fc->input(), fc->weights(), fc}); - continue; - } - auto tconv = dynamic_cast<CircleTransposeConv *>(out); - if (tconv != nullptr && tconv->bias() == circle_const) - { - assert(tconv->outBackprop() != nullptr); - assert(tconv->filter() != nullptr); - result.push_back({tconv->outBackprop(), tconv->filter(), tconv}); - continue; - } - } - return result; -} - -CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale, - float *scaling_factor, int64_t *zp) -{ - float scale = input_scale * weight_scale; - const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - std::vector<int32_t> quantized_values(size); - for (uint32_t i = 0; i < size; ++i) - { - quantized_values[i] = - static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); - } - - auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size); - - const int32_t kMinScale = std::numeric_limits<int32_t>::lowest(); - const int32_t kMaxScale = std::numeric_limits<int32_t>::max(); - for (uint32_t i = 0; i < size; ++i) - { - new_bias->at<loco::DataType::S32>(i) = - std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); - } - *scaling_factor = scale; - *zp = 0; - - return new_bias; -} - -CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale, - std::vector<float> &weight_scale, - std::vector<float> &scaling_factor, std::vector<int64_t> &zp) -{ - float scaling_factor_inv{0}; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - std::vector<int32_t> quantized_values(size); - - for (uint32_t i = 0; i < size; ++i) - { - scaling_factor[i] = input_scale * weight_scale[i]; - scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i]; - quantized_values[i] = - static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); - zp[i] = 0; - } - - auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size); - - const int32_t kMinScale = std::numeric_limits<int32_t>::lowest(); - const int32_t kMaxScale = std::numeric_limits<int32_t>::max(); - for (uint32_t i = 0; i < size; ++i) - { - new_bias->at<loco::DataType::S32>(i) = - std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); - } - - return new_bias; -} - -CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale, - std::vector<float> &weight_scale, - std::vector<float> &scaling_factor, - std::vector<int64_t> &zp) -{ - float scaling_factor_inv{0}; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - std::vector<int64_t> quantized_values(size); - - for (uint32_t i = 0; i < size; ++i) - { - scaling_factor[i] = input_scale * weight_scale[i]; - scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i]; - quantized_values[i] = - static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv)); - zp[i] = 0; - } - - auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size); - - for (uint32_t i = 0; i < size; ++i) - { - new_bias->at<loco::DataType::S64>(i) = quantized_values[i]; - } - - return new_bias; -} - -bool has_min_max(const CircleNode *node) -{ - return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty(); -} - -void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor, - int32_t &channel_dim_index) -{ - assert(node->dtype() == loco::DataType::FLOAT32); - - const int32_t kMaxScale = std::numeric_limits<int16_t>::max(); - const int32_t kMinScale = -kMaxScale; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - std::vector<int32_t> quantized_values(size); - - auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) { - int channel_idx = indices[channel_dim_index]; - const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx]; - auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices)); - quantized_values[cal_offset(dimension, indices)] = - static_cast<int32_t>(std::round(data * scaling_factor_inv)); - }; - - iterate_per_channel(node, channel_dim_index, quantize); - - node->dtype(loco::DataType::S16); // change the type of tensor - node->size<loco::DataType::S16>(size); // resize tensor - for (uint32_t i = 0; i < size; ++i) - { - node->at<loco::DataType::S16>(i) = - std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); - } -} - -void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min, - std::vector<float> &scaling_factor, int32_t &channel_dim_index) -{ - assert(node->dtype() == loco::DataType::FLOAT32); - - const int32_t kMinScale = 0; - const int32_t kMaxScale = 255; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - std::vector<int32_t> quantized_values(size); - - auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) { - int channel_idx = indices[channel_dim_index]; - const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx]; - auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices)); - quantized_values[cal_offset(dimension, indices)] = - static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv)); - }; - - iterate_per_channel(node, channel_dim_index, quantize); - - node->dtype(loco::DataType::U8); // change the type of tensor - node->size<loco::DataType::U8>(size); // resize tensor - for (uint32_t i = 0; i < size; ++i) - { - node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); - } -} - -void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor) -{ - const int32_t kMinScale = 0; - const int32_t kMaxScale = 255; - - uint32_t size = node->size<loco::DataType::FLOAT32>(); - - const float scaling_factor_inv = 1.0 / scaling_factor; - std::vector<int32_t> quantized_values(size); - for (uint32_t i = 0; i < size; ++i) - { - auto data = node->at<loco::DataType::FLOAT32>(i); - quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv)); - } - - node->dtype(loco::DataType::U8); // change the type of tensor - node->size<loco::DataType::U8>(size); // resize tensor - for (uint32_t i = 0; i < size; ++i) - { - node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); - } -} - -void set_bias(luci::CircleNode *node, luci::CircleConst *bias) -{ - if (auto conv = dynamic_cast<CircleConv2D *>(node)) - conv->bias(bias); - else if (auto dconv = dynamic_cast<CircleDepthwiseConv2D *>(node)) - dconv->bias(bias); - else if (auto tconv = dynamic_cast<CircleTransposeConv *>(node)) - tconv->bias(bias); - else if (auto fc = dynamic_cast<CircleFullyConnected *>(node)) - fc->bias(bias); - else - throw std::runtime_error("Only convolution, depthwise convolution, transposed convolution, and " - "fully-connected layer have bias"); -} - -void set_act_qparam(luci::CircleNode *node, float scale, int64_t zp) -{ - assert(node); // FIX_CALLER_UNLESS - assert(node->quantparam()); // FIX_CALLER_UNLESS - - auto qparam = node->quantparam(); - assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS - assert(qparam->zerop.size() == 1); // FIX_CALLER_UNLESS - qparam->scale[0] = scale; - qparam->zerop[0] = zp; -} - -/** - * @brief Manually set scale/zp of output tensor of special Ops - */ -struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void> -{ - QuantizeSpecialActivation(loco::DataType input, loco::DataType output) - : input_type(input), output_type(output) - { - } - - loco::DataType input_type; - loco::DataType output_type; - - void visit(luci::CircleNode *) - { - // Do nothing by default - } - - void visit(luci::CircleLogistic *node) - { - if (output_type == loco::DataType::U8) - set_act_qparam(node, 1.0f / 256.0f, 0); - else - { - assert(output_type == loco::DataType::S16); - set_act_qparam(node, 1.0f / 32768.0f, 0); - } - } - - void visit(luci::CircleTanh *node) - { - if (output_type == loco::DataType::U8) - set_act_qparam(node, 2.0f / 256.0f, 128); - else - { - assert(output_type == loco::DataType::S16); - set_act_qparam(node, 1.0f / 32768.0f, 0); - } - } - - void visit(luci::CircleStridedSlice *node) - { - auto input = loco::must_cast<luci::CircleNode *>(node->input()); - auto i_qparam = input->quantparam(); - assert(i_qparam); - assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS - assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS - auto i_scale = i_qparam->scale[0]; - auto i_zp = i_qparam->zerop[0]; - - set_act_qparam(node, i_scale, i_zp); - } - - void visit(luci::CircleSplitOut *node) - { - auto split = loco::must_cast<luci::CircleSplit *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(split->input()); - auto i_qparam = input->quantparam(); - assert(i_qparam); - assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS - assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS - auto i_scale = i_qparam->scale[0]; - auto i_zp = i_qparam->zerop[0]; - - set_act_qparam(node, i_scale, i_zp); - } - - void visit(luci::CircleSplitVOut *node) - { - auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); - auto i_qparam = input->quantparam(); - assert(i_qparam); - assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS - assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS - auto i_scale = i_qparam->scale[0]; - auto i_zp = i_qparam->zerop[0]; - - set_act_qparam(node, i_scale, i_zp); - } - - void visit(luci::CircleUnpackOut *node) - { - auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(unpack->value()); - auto i_qparam = input->quantparam(); - assert(i_qparam); - assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS - assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS - auto i_scale = i_qparam->scale[0]; - auto i_zp = i_qparam->zerop[0]; - - set_act_qparam(node, i_scale, i_zp); - } - - // TODO Move Softmax, Floor, Ceil from QuantizeActivation to here -}; - /** - * @brief QuantizeActivation quantizes tensors for activations - * @details Quantize using recorded min/max values + * Insert Quantize operator for mixed-precision quantization + * 1. Before input feature map (only for non-const) + * 2. After output feature map + * + * For example, if default_dtype = U8 and op_dtype = S16, + * 1. Quantize Op for U8->S16 is inserted before ifm + * 2. Quantize Op for S16->U8 is inserted after ofm + * + * Why not insert Quantize Op for const ifm? + * We quantize const tensor at once to preserve precision. + * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D, + * We directly quantize weights to 16 bits, not 8->16 bits. */ -struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool> +struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void> { - QuantizeActivation(loco::DataType input, loco::DataType output) - : input_type(input), output_type(output) + InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype) + : _default_dtype(default_dtype), _op_dtype(op_dtype) { + assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS } - loco::DataType input_type; - loco::DataType output_type; +private: + loco::DataType _default_dtype; + loco::DataType _op_dtype; - // Quantize input tensors of each node - bool visit(luci::CircleNode *node) +private: + luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin) + { + auto input = loco::must_cast<luci::CircleNode *>(in); + if (input->opcode() == luci::CircleOpcode::CIRCLECONST) + return nullptr; + + auto input_quant = create_quantize_op(input, _op_dtype); + input_quant->input(input); + auto origin_node = loco::must_cast<luci::CircleNode *>(origin); + luci::add_origin(input_quant, luci::get_origin(origin_node)); + return input_quant; + } + + void insert_out_quantize(loco::Node *node) + { + auto output = loco::must_cast<luci::CircleNode *>(node); + assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS + auto output_quant = create_quantize_op(output, _default_dtype); + + luci::add_origin(output_quant, luci::get_origin(output)); + loco::replace(node).with(output_quant); + output_quant->input(node); + } + +// INPUT_NAME is the only activation of NODE +#define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME) \ + void visit(NODE *node) \ + { \ + if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \ + node->INPUT_NAME(input_quant); \ + \ + insert_out_quantize(node); \ + } + +// INPUT_NAME is the only activation of NODE +#define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \ + void visit(NODE *node) \ + { \ + if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \ + node->INPUT_NAME(input_quant); \ + \ + auto out_nodes = loco::succs(node); \ + for (auto out_node : out_nodes) \ + { \ + auto out_circle = loco::must_cast<OUT_NAME *>(out_node); \ + insert_out_quantize(out_circle); \ + } \ + } + +// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE +#define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2) \ + void visit(NODE *node) \ + { \ + if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \ + node->INPUT_NAME1(input1_quant); \ + \ + if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \ + node->INPUT_NAME2(input2_quant); \ + \ + insert_out_quantize(node); \ + } + + // Default behavior (NYI) + void visit(luci::CircleNode *node) + { + throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " + + node->name()); + } + + // Skip output layer + void visit(luci::CircleOutput *) {} + void visit(luci::CircleSplitVOut *) {} + void visit(luci::CircleSplitOut *) {} + void visit(luci::CircleTopKV2Out *) {} + void visit(luci::CircleUniqueOut *) {} + void visit(luci::CircleUnpackOut *) {} + + // Ops that receive a single activation as an input + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop) + + // Ops that receive two activations as inputs + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y) + INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y) + + // Multiple-output ops that receive one activation as inputs + INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut) + INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut) + INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out) + INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut) + INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut) + + // AddN has arbitrary number of inputs + void visit(luci::CircleAddN *node) { - LOGGER(l); - INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl; auto arity = node->arity(); for (uint32_t i = 0; i < arity; i++) { - auto input_node = node->arg(i); - auto circle_node = loco::must_cast<luci::CircleNode *>(input_node); - - // Check if this is already quantized - if (is_quantized(circle_node)) - continue; - - // Check if this is bias (bias is quantized later) - auto iwo = get_input_weight_output_of_bias(circle_node); - if (iwo.size() > 0) - continue; - - // Check if this is bool type (bool type is not quantized) - if (circle_node->dtype() == loco::DataType::BOOL) - continue; - - // Check if this is activation - // We assume min/max are recorded only for activations - if (has_min_max(circle_node) && !is_weights(circle_node)) - { - // Quantize using recorded min/max - auto quantparam = circle_node->quantparam(); - assert(quantparam); - assert(quantparam->min.size() == 1); // only support layer-wise quant - assert(quantparam->max.size() == 1); // only support layer-wise quant - auto min = quantparam->min[0]; - auto max = quantparam->max[0]; - - // Special values - if (circle_node->opcode() == luci::CircleOpcode::SOFTMAX) - { - min = 0.0f; - max = 1.0f; - } - - float scaling_factor{0}; - int64_t zp{0}; - float nudged_min{0}; - float nudged_max{0}; - - if (output_type == loco::DataType::U8) - { - compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); - circle_node->dtype(loco::DataType::U8); - } - else - { - compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); - circle_node->dtype(loco::DataType::S16); - } - - // Nodes fused with activation functions which need special quantization - auto fused_act_node = - dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(circle_node); - if (fused_act_node != nullptr && - fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) - { - if (output_type == loco::DataType::U8) - { - scaling_factor = 2.0f / 256.0f; - zp = 128; - } - else - { - assert(output_type == loco::DataType::S16); - scaling_factor = 1.0f / 32768.0f; - zp = 0; - } - } - - // The output of these Ops should be integer, so scale should be integer - // TODO Handle cases where the integer scale needs to be propagated - if (circle_node->opcode() == CircleOpcode::FLOOR || - circle_node->opcode() == CircleOpcode::FLOOR_DIV || - circle_node->opcode() == CircleOpcode::FLOOR_MOD || - circle_node->opcode() == CircleOpcode::CEIL) - { - assert(scaling_factor >= 0); // FIX_ME_UNLESS - scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor); - } - - circle_node->quantparam()->scale.push_back(scaling_factor); - circle_node->quantparam()->zerop.push_back(zp); - } - // Fix special attributes - if (circle_node->opcode() == luci::CircleOpcode::CAST) - { - auto *cast = loco::must_cast<luci::CircleCast *>(circle_node); - auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x()); - - // make sure that cast_input is already quantized - assert(cast_input->dtype() != loco::DataType::FLOAT32); - cast->in_data_type(cast_input->dtype()); - cast->out_data_type(cast->dtype()); - } - } - return false; - } -}; - -struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool> -{ - QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr) - : input_type(input), output_type(output), granularity(gr) - { - } - - loco::DataType input_type; - loco::DataType output_type; - QuantizationGranularity granularity; - - // Quantize bias node - bool visit(luci::CircleNode *node) - { - // Check if this is already quantized - if (is_quantized(node)) - return false; - - auto iwo_list = get_input_weight_output_of_bias(node); - - for (auto iwo : iwo_list) - { - assert(iwo.size() == 3); - - auto input = loco::must_cast<luci::CircleNode *>(iwo[0]); - auto weight = loco::must_cast<luci::CircleNode *>(iwo[1]); - auto output = loco::must_cast<luci::CircleNode *>(iwo[2]); - - auto const_bias = loco::must_cast<luci::CircleConst *>(node); - assert(const_bias->dtype() == loco::DataType::FLOAT32); - - // If input is const, it is quantized here, not in QuantizeActivation - if (auto const_input = dynamic_cast<luci::CircleConst *>(input)) - { - quant_const(const_input, output_type); - } - - CircleConst *new_bias = nullptr; - - if (granularity == QuantizationGranularity::ChannelWise) - { - auto input_q = input->quantparam(); - assert(input_q); - assert(input_q->scale.size() == 1); // input scale's layer-wise - auto input_scale = input_q->scale[0]; - - assert(weight->quantparam() != nullptr); // weight scale's channel-wise - auto weight_scale = weight->quantparam()->scale; - - uint32_t size = const_bias->size<loco::DataType::FLOAT32>(); - assert(size == weight_scale.size()); - std::vector<float> scaling_factor(size); - std::vector<int64_t> zp(size); - - if (output_type == loco::DataType::U8) - { - new_bias = - quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); - } - else if (output_type == loco::DataType::S16) - { - new_bias = - int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); - } - else - { - throw std::runtime_error("Unsupported quantization type."); - } - - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->scale = scaling_factor; - quantparam->zerop = zp; - assert(new_bias->quantparam() == nullptr); // bias should not be quantized before - new_bias->quantparam(std::move(quantparam)); - - set_bias(output, new_bias); - } - else - { - auto input_q = input->quantparam(); - assert(input_q); - assert(input_q->scale.size() == 1); // Only support per-layer quant - auto input_scale = input_q->scale[0]; - - auto weight_q = weight->quantparam(); - assert(weight_q); - assert(weight_q->scale.size() == 1); // Only support per-layer quant - auto weight_scale = weight_q->scale[0]; - - float scaling_factor{0}; - int64_t zp{0}; - new_bias = - asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp); - auto quantparam = std::make_unique<CircleQuantParam>(); - quantparam->scale.push_back(scaling_factor); - quantparam->zerop.push_back(zp); - assert(new_bias->quantparam() == nullptr); // bias should not be quantized before - new_bias->quantparam(std::move(quantparam)); - - set_bias(output, new_bias); - } - } - return false; - } -}; - -/** - * @brief QuantizeWeights quantizes tensors for weights - * @details Find min/max values on the fly and then quantize - */ -struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool> -{ - QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr) - : input_type(input), output_type(output), granularity(gr) - { - } - - loco::DataType input_type; - loco::DataType output_type; - QuantizationGranularity granularity; - -private: - void quantize_weights(luci::CircleConst *weights) - { - // Find min/max per channel-wise - if (granularity == QuantizationGranularity::ChannelWise) - { - auto quantparam = weights->quantparam(); - if (quantparam == nullptr) - { - assert(false && "quantparam is nullptr"); - return; - } - - auto min = quantparam->min; - auto scaling_factor = quantparam->scale; - int32_t channel_dim_index = 0; - - if (output_type == loco::DataType::U8) - { - asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index); - } - else - { - sym_wquant_per_channel(weights, scaling_factor, channel_dim_index); - } - quantparam->min.clear(); - quantparam->max.clear(); - quantparam->quantized_dimension = channel_dim_index; - } - // Find min/max per layer-wise - else - { - // Quantize using recorded quantparam - auto quantparam = weights->quantparam(); - assert(quantparam != nullptr); - assert(quantparam->min.size() == 1); // only support layer-wise quant - assert(quantparam->scale.size() == 1); // only support layer-wise quant - auto min = quantparam->min[0]; - auto scaling_factor = quantparam->scale[0]; - asym_wquant_per_layer(weights, min, scaling_factor); - quantparam->min.clear(); - quantparam->max.clear(); - } - } - - bool visit(luci::CircleConv2D *node) - { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - - auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); - if (!is_quantized(weights)) - { - auto new_weights = luci::clone(weights); - node->filter(new_weights); - quantize_weights(new_weights); - return true; + if (auto input_quant = create_in_quantize(node->inputs(i), node)) + node->inputs(i, input_quant); } - return false; - } - - bool visit(luci::CircleDepthwiseConv2D *node) - { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); - if (!is_quantized(weights)) - { - auto new_weights = luci::clone(weights); - node->filter(new_weights); - quantize_weights(new_weights); - return true; - } - return false; + insert_out_quantize(node); } - bool visit(luci::CircleInstanceNorm *node) + // Concat has arbitrary number of inputs + void visit(luci::CircleConcatenation *node) { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - - auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma()); - auto beta = loco::must_cast<luci::CircleConst *>(node->beta()); - - bool changed = false; - if (!is_quantized(gamma)) - { - assert(gamma->dtype() == loco::DataType::FLOAT32); - auto new_gamma = luci::clone(gamma); - if (granularity == QuantizationGranularity::LayerWise) - quant_const(new_gamma, output_type); - else if (granularity == QuantizationGranularity::ChannelWise) - quant_const_per_channel(new_gamma, output_type); - node->gamma(new_gamma); - changed = true; - } - if (!is_quantized(beta)) - { - assert(beta->dtype() == loco::DataType::FLOAT32); - auto new_beta = luci::clone(beta); - if (granularity == QuantizationGranularity::LayerWise) - quant_const(new_beta, output_type); - else if (granularity == QuantizationGranularity::ChannelWise) - quant_const_per_channel(new_beta, output_type); - node->beta(new_beta); - changed = true; - } - - return changed; - } - - bool visit(luci::CirclePRelu *node) - { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - - auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha()); - - if (!is_quantized(alpha)) + auto arity = node->arity(); + for (uint32_t i = 0; i < arity; i++) { - assert(alpha->dtype() == loco::DataType::FLOAT32); - auto new_alpha = luci::clone(alpha); - if (granularity == QuantizationGranularity::LayerWise) - quant_const(new_alpha, output_type); - else if (granularity == QuantizationGranularity::ChannelWise) - quant_const_per_channel(new_alpha, output_type); - node->alpha(new_alpha); - return true; + if (auto input_quant = create_in_quantize(node->values(i), node)) + node->values(i, input_quant); } - return false; + insert_out_quantize(node); } - bool visit(luci::CircleTransposeConv *node) + // Pack has arbitrary number of inputs + void visit(luci::CirclePack *node) { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - - auto weights = loco::must_cast<luci::CircleConst *>(node->filter()); - if (!is_quantized(weights)) + auto arity = node->arity(); + for (uint32_t i = 0; i < arity; i++) { - auto new_weights = luci::clone(weights); - node->filter(new_weights); - quantize_weights(new_weights); - return true; + if (auto input_quant = create_in_quantize(node->values(i), node)) + node->values(i, input_quant); } - return false; - } - - bool visit(luci::CircleFullyConnected *node) - { - LOGGER(l); - INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl; - auto weights = loco::must_cast<luci::CircleConst *>(node->weights()); - if (!is_quantized(weights)) - { - auto new_weights = luci::clone(weights); - node->weights(new_weights); - quantize_weights(new_weights); - return true; - } - return false; + insert_out_quantize(node); } - bool visit(luci::CircleNode *) { return false; } +#undef INSERT_QUANTIZE_TO_UNARY_OP +#undef INSERT_QUANTIZE_TO_BINARY_OP +#undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP }; -/** EXAMPLE - * - * BEFORE - * - * [CircleNode] [CircleConst] - * (qparam1) (FP32) - * \ / - * \ / - * [CirclePack] - * (qparam2) - * - * AFTER - * - * [CircleNode] [CircleConst] [CircleConst] <- Dead node - * (qparam2) (qparam2) (FP32) - * \ / - * \ / - * [CirclePack] - * (qparam2) - * - * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs. - */ -void propagate_pack_quantparam(luci::CirclePack *pack, loco::DataType quant_type) -{ - assert(pack->quantparam() != nullptr); - - const auto num_inputs = pack->values_count(); - - for (uint32_t i = 0; i < num_inputs; i++) - { - auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i)); - - // Skip if this input is PACK Op - if (node->opcode() == luci::CircleOpcode::PACK) - continue; - - // Quantize constant values - if (node->opcode() == luci::CircleOpcode::CIRCLECONST) - { - luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); - if (const_node->dtype() != loco::DataType::FLOAT32) - throw std::runtime_error("Unsupported data type for constant input of pack Op"); - - const auto pack_qparam = pack->quantparam(); - if (pack_qparam == nullptr) - throw std::runtime_error("quantparam of pack is not found during propagation"); - - assert(pack_qparam->scale.size() == 1); - assert(pack_qparam->zerop.size() == 1); - const auto scaling_factor = pack_qparam->scale[0]; - const auto zerop = pack_qparam->zerop[0]; - - auto new_const = luci::clone(const_node); - quant_const_values(new_const, scaling_factor, zerop, quant_type); - pack->values(i, new_const); - overwrite_quantparam(pack, new_const); - } - else - { - const auto succs = loco::succs(node); - if (succs.size() > 1) - continue; - - // Non-const input must have been quantized - assert(node->quantparam() != nullptr); - overwrite_quantparam(pack, node); - } - } -} - -/** - * @brief Quantize const input tensors using min/max of const values - */ -void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) -{ - auto opcode = node->opcode(); - auto arity = node->arity(); - - loco::Node *input_node{nullptr}; - luci::CircleConst *const_node{nullptr}; - - switch (opcode) - { - case luci::CircleOpcode::CONV_2D: - case luci::CircleOpcode::DEPTHWISE_CONV_2D: - case luci::CircleOpcode::FULLY_CONNECTED: - case luci::CircleOpcode::INSTANCE_NORM: - case luci::CircleOpcode::PRELU: - case luci::CircleOpcode::TRANSPOSE_CONV: - // Handled in QuantizeWeights and QuantizeBias - break; - - case luci::CircleOpcode::CONCATENATION: - // Handled in propagate_concat_quantparam - break; - - case luci::CircleOpcode::LOGICAL_OR: - // Inputs of logical Ops are bool, thus not quantized - break; - - case luci::CircleOpcode::ARG_MAX: - case luci::CircleOpcode::ARG_MIN: - case luci::CircleOpcode::BATCH_TO_SPACE_ND: - case luci::CircleOpcode::LOCAL_RESPONSE_NORMALIZATION: - case luci::CircleOpcode::MEAN: - case luci::CircleOpcode::MIRROR_PAD: - case luci::CircleOpcode::PAD: - case luci::CircleOpcode::REDUCE_ANY: - case luci::CircleOpcode::REDUCE_PROD: - case luci::CircleOpcode::REDUCE_MAX: - case luci::CircleOpcode::REDUCE_MIN: - case luci::CircleOpcode::RESHAPE: - case luci::CircleOpcode::RESIZE_BILINEAR: - case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR: - case luci::CircleOpcode::REVERSE_SEQUENCE: - case luci::CircleOpcode::SLICE: - case luci::CircleOpcode::SPACE_TO_BATCH_ND: - case luci::CircleOpcode::SPLIT_V: - case luci::CircleOpcode::STRIDED_SLICE: - case luci::CircleOpcode::SUM: - case luci::CircleOpcode::TILE: - case luci::CircleOpcode::TOPK_V2: - case luci::CircleOpcode::TRANSPOSE: - // The second input of these Ops should not be quantized - // Ex: axis, paddings - input_node = node->arg(0); - const_node = dynamic_cast<luci::CircleConst *>(input_node); - if (const_node != nullptr && !is_quantized(const_node)) - quant_const(const_node, output_type); - break; - - case luci::CircleOpcode::ADD: - case luci::CircleOpcode::ADD_N: - case luci::CircleOpcode::DEPTH_TO_SPACE: - case luci::CircleOpcode::DIV: - case luci::CircleOpcode::ELU: - case luci::CircleOpcode::EQUAL: - case luci::CircleOpcode::EXP: - case luci::CircleOpcode::FLOOR: - case luci::CircleOpcode::FLOOR_DIV: - case luci::CircleOpcode::GREATER: - case luci::CircleOpcode::GREATER_EQUAL: - case luci::CircleOpcode::LESS: - case luci::CircleOpcode::LESS_EQUAL: - case luci::CircleOpcode::LOGISTIC: - case luci::CircleOpcode::MAXIMUM: - case luci::CircleOpcode::MINIMUM: - case luci::CircleOpcode::MUL: - case luci::CircleOpcode::NOT_EQUAL: - case luci::CircleOpcode::POW: - case luci::CircleOpcode::RSQRT: - case luci::CircleOpcode::SOFTMAX: - case luci::CircleOpcode::SPACE_TO_DEPTH: - case luci::CircleOpcode::SQRT: - case luci::CircleOpcode::SUB: - case luci::CircleOpcode::TANH: - case luci::CircleOpcode::UNPACK: - // Quantize all const inputs using their values - for (uint32_t i = 0; i < arity; i++) - { - input_node = node->arg(i); - const_node = dynamic_cast<luci::CircleConst *>(input_node); - if (const_node != nullptr && !is_quantized(const_node)) - quant_const(const_node, output_type); - } - break; - - case luci::CircleOpcode::SPLIT: - // Only the second input is quantized - // First input should not be quantized (e.g., split_dim) - input_node = node->arg(1); - const_node = dynamic_cast<luci::CircleConst *>(input_node); - if (const_node != nullptr && !is_quantized(const_node)) - quant_const(const_node, output_type); - break; - - case luci::CircleOpcode::PADV2: - // First and third constant inputs are quantized - // Second input should not be quantized (e.g., paddings) - // Quant params are propagated either from output range to the non-constant input - // or from input to output and constant values - propagate_pad_v2_quantparam(loco::must_cast<CirclePadV2 *>(node), output_type); - break; - - case luci::CircleOpcode::PACK: - // Quant param is propagated from output to inputs - propagate_pack_quantparam(loco::must_cast<CirclePack *>(node), output_type); - break; - - default: - for (uint32_t i = 0; i < arity; i++) - { - input_node = node->arg(i); - const_node = dynamic_cast<luci::CircleConst *>(input_node); - if (const_node != nullptr) - throw std::runtime_error("Unsupported Op for const inputs"); - } - break; - } -} - } // namespace -/** BEFORE - * - * [CircleNode] [CircleConst] - * (U8 qparam1) (FP32) - * \ / - * \ / - * [CircleConcatenation] - * (U8 qparam2) - * - * AFTER - * [CircleNode] [CircleConst] [CircleConst] <- Dead node - * (U8 qparam2) (U8 qparam2) (FP32) - * \ / - * \ / - * [CircleConcatenation] - * (U8 qparam2) - */ -void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type) -{ - assert(concat->quantparam() != nullptr); - - const auto num_inputs = concat->numValues(); - - // Quantize const inputs using their values if concat has fused act function - if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE) - { - for (uint32_t i = 0; i < num_inputs; i++) - { - auto node = concat->arg(i); - auto const_node = dynamic_cast<luci::CircleConst *>(node); - if (const_node != nullptr) - { - auto new_const = luci::clone(const_node); - quant_const(new_const, quant_type); - concat->values(i, new_const); - } - } - return; - } - - for (uint32_t i = 0; i < num_inputs; i++) - { - auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i)); - - // Skip if this input is CONCAT Op - if (node->opcode() == luci::CircleOpcode::CONCATENATION) - continue; - - // Quantize constant values - if (node->opcode() == luci::CircleOpcode::CIRCLECONST) - { - luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); - if (const_node->dtype() != loco::DataType::FLOAT32) - throw std::runtime_error("Unsupported data type for constant input of concatenation Op"); - - const auto concat_qparam = concat->quantparam(); - if (concat_qparam == nullptr) - throw std::runtime_error("quantparam of concat is not found during propagation"); - - assert(concat_qparam->scale.size() == 1); - const auto scaling_factor = concat_qparam->scale[0]; - const auto zerop = concat_qparam->zerop[0]; - - auto new_const = luci::clone(const_node); - quant_const_values(new_const, scaling_factor, zerop, quant_type); - concat->values(i, new_const); - overwrite_quantparam(concat, new_const); - } - else - { - const auto succs = loco::succs(node); - if (succs.size() > 1) - continue; - - // Non-const input must have been quantized - assert(node->quantparam() != nullptr); - overwrite_quantparam(concat, node); - } - } -} - -/** - * tells if pad_v2 quantization should ignore padding value - * In that case padding const will be quantized with input parameters, and probably clipped - */ -bool ignore_pad_v2_const_quantization(luci::CirclePadV2 *pad) -{ - // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly - // TODO use metadata hints to detect this case - auto const_value_node = dynamic_cast<luci::CircleConst *>(pad->arg(2)); - if (!const_value_node) - return false; - if (const_value_node->dtype() == loco::DataType::FLOAT32) - { - float const_value = const_value_node->at<loco::DataType::FLOAT32>(0); - if (const_value == std::numeric_limits<float>::lowest()) - return true; - } - return false; -} - -/** BEFORE - * - * [CircleNode] [CircleConst] [CircleConst] - * (U8 qparam1) (S32) (FP32) - * \ | / - * \ | / - * [CirclePadV2] - * (U8 qparam2) - * - * AFTER (case 1) - * - * By default qparam is propagated from output to inputs to meet backend requirements. - * - * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node - * (U8 qparam2) (S32) (U8 qparam2) (FP32) - * \ | / - * \ | / - * [CirclePadV2] - * (U8 qparam2) - * - * AFTER (case 2) - * - * In case padded value is the lowest float value - * Qparam is propagated from input to output and constant. - * - * This is a special case for optimization constructed pad, needed to guarantee that - * extremely large negative constant do not stretch output quantization range. - * - * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node - * (U8 qparam1) (S32) (U8 qparam1) (FP32) - * \ | / - * \ | / - * [CirclePadV2] - * (U8 qparam1) - */ -void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type) -{ - if (ignore_pad_v2_const_quantization(pad_v2)) - { - // propagate input quantization paramters from input to output and padding const value - auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0)); - overwrite_quantparam(pad_v2_input, pad_v2); - - auto const_value_node = loco::must_cast<luci::CircleConst *>( - pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS - auto new_const = luci::clone(const_value_node); - - const auto pad_v2_input_qparam = pad_v2_input->quantparam(); - assert(pad_v2_input_qparam != nullptr); - assert(pad_v2_input_qparam->scale.size() == 1); - const auto scaling_factor = pad_v2_input_qparam->scale.at(0); - const auto zerop = pad_v2_input_qparam->zerop.at(0); - - quant_const_values(new_const, scaling_factor, zerop, quant_type); - overwrite_quantparam(pad_v2_input, new_const); - pad_v2->constant_values(new_const); - return; - } - - // Propagate quantization paramters from output to inputs, - // to fit both input and counstant_value in one quant range. - auto quant_input = [pad_v2, quant_type](void (CirclePadV2::*arg_setter)(loco::Node *), - uint32_t arg) { - auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg)); - - // Quantize constant values - if (node->opcode() == luci::CircleOpcode::CIRCLECONST) - { - luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node); - if (is_quantized(const_node)) - return; - - if (const_node->dtype() != loco::DataType::FLOAT32) - throw std::runtime_error("Unsupported data type for constant input of PadV2 Op"); - - const auto pad_v2_qparam = pad_v2->quantparam(); - if (pad_v2_qparam == nullptr) - throw std::runtime_error("quantparam of PadV2 is not found during propagation"); - - assert(pad_v2_qparam->scale.size() == 1); - const auto scaling_factor = pad_v2_qparam->scale.at(0); - const auto zerop = pad_v2_qparam->zerop.at(0); - - auto new_const = luci::clone(const_node); - quant_const_values(new_const, scaling_factor, zerop, quant_type); - overwrite_quantparam(pad_v2, new_const); - (pad_v2->*arg_setter)(new_const); - } - // Subsequent PadV2 Ops quant params are not propagated - else if (node->opcode() == luci::CircleOpcode::PADV2) - { - return; - } - else - { - const auto succs = loco::succs(node); - if (succs.size() > 1) - return; - - // Non-const input must have been quantized - assert(node->quantparam() != nullptr); - overwrite_quantparam(pad_v2, node); - } - }; - - quant_input(&CirclePadV2::input, 0); - quant_input(&CirclePadV2::constant_values, 2); -} - void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const { auto inputs = g->inputs(); for (auto node : loco::input_nodes(g)) { auto input = loco::must_cast<luci::CircleInput *>(node); - if (input->dtype() == _input_type) + if (input->dtype() == _ctx->input_type) continue; // Bool type is not quantizable if (input->dtype() == loco::DataType::BOOL) continue; + if (input->dtype() == loco::DataType::S32) + continue; + if (input->dtype() == loco::DataType::S64) + continue; // Insert Quantize Op auto quant_op = create_quantize_op(input, input->dtype()); @@ -1552,22 +367,22 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const float nudged_min{0}; float nudged_max{0}; - if (_input_type == loco::DataType::U8) + if (_ctx->input_type == loco::DataType::U8) { compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); } else { - assert(_input_type == loco::DataType::S16); + assert(_ctx->input_type == loco::DataType::S16); compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); } - input->dtype(_input_type); + input->dtype(_ctx->input_type); input->quantparam()->scale[0] = scaling_factor; input->quantparam()->zerop[0] = zp; } auto graph_input = inputs->at(input->index()); - graph_input->dtype(_input_type); + graph_input->dtype(_ctx->input_type); } } @@ -1577,7 +392,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const for (auto node : loco::output_nodes(g)) { auto output = loco::must_cast<luci::CircleOutput *>(node); - if (output->dtype() == _output_type) + if (output->dtype() == _ctx->output_type) continue; // Bool type is not quantizable @@ -1591,7 +406,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const continue; // Insert Quantize Op - auto quant_op = create_quantize_op(from, _output_type); + auto quant_op = create_quantize_op(from, _ctx->output_type); loco::replace(from).with(quant_op); quant_op->input(from); @@ -1599,67 +414,165 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const luci::add_origin(quant_op, luci::get_origin(from)); auto graph_output = outputs->at(output->index()); - graph_output->dtype(_output_type); + graph_output->dtype(_ctx->output_type); } } +/** + * How QuantizeWithMinMax works? + * + * We categorized tensors into four groups + * - Activation: Feature maps (both Const/Non-const) + * - Weights: Const tensors of specific Ops (Conv, FC, ...) + * - Bias: Const tensors of specific Ops (Conv, FC, ...) + * - Others: padding value, one_hot value, axis, .. + * + * Activation is quantized in different ways + * 1. For non-constant activation, quantize using recorded min/max + * 2. For constant activation, quantize using min/max of its value + * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation) + * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation) + * 5. For some Ops (ex: tanh), output qparam has pre-defined values + * + * Weights is quantized using min/max of its value + * + * Bias is quantized using input scale (s_i) and weights scale (s_w) + * - Activation and weights should be quantized earlier than bias + * + * Quantization Steps + * 1. Quantize Activation + * - Quantize using recorded min/max (QuantizeActivation) + * - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp) + * - Remove redundant Quantize Ops (RemoveRedundantQuantizePass) + * - Propagate qparam backward (PropagateQParamBackwardPass) + * - Quantize const inputs (QuantizeConstInputActivation) + * - Quantize using pre-defined values (QuantizeSpecialActivation) + * - Propagate qparam forward (PropagateQParamForwardPass) + * 2. Quantize Weights + * 3. Quantize Bias + * 4. Set input dtype + * 5. Set output dtype + * + * Why quantization sequence was determined as above? + * - Activation and weights should be quantized before bias (1->2->3). Input/Output + * dtype can be updated at the end (4->5). + * - During activation quantization, + * - Backward propagation is performed earlier than forward propagation. This allows + * backward-propagated qpram to be overwritten during forward propagation. + * We made this decision as Ops for forward propagation (reshape, transpose, ..) + * are more common than backward propagation. TODO Check this decision is safe. + * - QuantizeSpecialActivation is called before forward propagation to make sure that + * the pre-defined qparam values are propagated. + */ bool QuantizeWithMinMaxPass::run(loco::Graph *g) { LOGGER(l); INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl; + auto info_by_name = layer_info_map(g, _ctx->layers_info); + + auto quantize_dtype = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization dtype + if (iter != info_by_name.end()) + return iter->second.dtype; + + // Return default quantization dtype + return _ctx->output_model_dtype; + }; + + auto quantize_granularity = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization granularity + if (iter != info_by_name.end()) + return iter->second.granularity; + + // Return default quantization granularity + return _ctx->granularity; + }; + // Quantize activation for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeActivation qa(_input_model_dtype, _output_model_dtype); auto circle_node = loco::must_cast<luci::CircleNode *>(node); + QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node)); circle_node->accept(&qa); } - // Quantize weights + // Insert Quantize Op for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); - circle_node->accept(&qw); + auto op_dtype = quantize_dtype(circle_node); + if (op_dtype != _ctx->output_model_dtype) + { + InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype); + circle_node->accept(&iqo); + } } - // Quantize bias + // Remove redundant Quantize Op + { + logo::Phase phase; + + phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); + } + + // Backward propagation of activation qparam + { + PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype); + pqbp.run(g); + } + + // Quantize const input activation for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); - circle_node->accept(&qb); + QuantizeConstInputActivation qcia(quantize_dtype(circle_node)); + circle_node->accept(&qcia); } - // Propagate quantization parameters of concat Op + // Update qparam of output of special Ops for (auto node : loco::active_nodes(loco::output_nodes(g))) { - auto concat = dynamic_cast<luci::CircleConcatenation *>(node); - if (not concat) - continue; - - // Propagate qparam of concat to its inputs if - // (1) concat is uint8-quantized - // (2) concat has no fused activation function - // (3) the input is not concatenation Op - // (4) the input is not produced to Ops other than concat - propagate_concat_quantparam(concat, _output_model_dtype); + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node)); + circle_node->accept(&qsa); } - // Quantize const inputs other than weights and bias + // Forward propagation of activation qparam + logo::Phase phase; + + phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool)); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); + + // Quantize weights for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast<luci::CircleNode *>(node); - quantize_const_inputs(circle_node, _output_model_dtype); + QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node), + quantize_granularity(circle_node)); + circle_node->accept(&qw); } - // Update qparam of output of special Ops + // Quantize bias for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype); auto circle_node = loco::must_cast<luci::CircleNode *>(node); - circle_node->accept(&qsa); + QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node), + quantize_granularity(circle_node)); + circle_node->accept(&qb); } // Update output dtype @@ -1667,11 +580,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) for (auto node : loco::output_nodes(g)) { auto circle_node = loco::must_cast<luci::CircleOutput *>(node); - if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype) + if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype) { - circle_node->dtype(_output_model_dtype); + circle_node->dtype(_ctx->output_model_dtype); auto graph_output = graph_outputs->at(circle_node->index()); - graph_output->dtype(_output_model_dtype); + graph_output->dtype(_ctx->output_model_dtype); } } diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp index 75ec0cfd8..d5fa21ffd 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp @@ -16,8 +16,41 @@ #include "luci/Pass/QuantizeWithMinMaxPass.h" +#include <luci/IR/CircleNodes.h> + #include <gtest/gtest.h> +class SimpleConcatGraph +{ +public: + SimpleConcatGraph(loco::DataType quant_type) + { + concat_node = g.nodes()->create<luci::CircleConcatenation>(2); + input_1 = g.nodes()->create<luci::CircleConst>(); + input_2 = g.nodes()->create<luci::CircleConst>(); + + concat_node->dtype(quant_type); + concat_node->fusedActivationFunction(luci::FusedActFunc::NONE); + input_1->dtype(quant_type); + input_2->dtype(quant_type); + + concat_node->values(0, input_1); + concat_node->values(1, input_2); + } + + ~SimpleConcatGraph() + { + concat_node->values(0, nullptr); + concat_node->values(1, nullptr); + } + +public: + loco::Graph g; + luci::CircleConcatenation *concat_node = nullptr; + luci::CircleConst *input_1 = nullptr; + luci::CircleConst *input_2 = nullptr; +}; + TEST(QuantizeWithMinMaxPassTest, name) { luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8, @@ -25,3 +58,19 @@ TEST(QuantizeWithMinMaxPassTest, name) auto const name = pass.name(); ASSERT_NE(nullptr, name); } + +// Test concat of integer tensors +// Integer tensors are not quantized +TEST(QuantizeWithMinMaxPassTest, int_concat) +{ + SimpleConcatGraph g(loco::DataType::S32); + + luci::QuantizeWithMinMaxPass qwmm(loco::DataType::FLOAT32, loco::DataType::U8, + luci::QuantizationGranularity::LayerWise); + + qwmm.run(&g.g); + + EXPECT_EQ(nullptr, g.concat_node->quantparam()); + EXPECT_EQ(nullptr, g.input_1->quantparam()); + EXPECT_EQ(nullptr, g.input_2->quantparam()); +} diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.cpp index f02301ed1..684d5d48a 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.cpp +++ b/compiler/luci/pass/src/QuantizedModelVerifier.cpp @@ -15,10 +15,10 @@ #include "QuantizedModelVerifier.h" -#include "VerifyQuantizedNodeLayerWiseGranularity.h" -#include "VerifyQuantizedNodeChannelWiseGranularity.h" -#include "VerifyQuantizedNodeU8Type.h" -#include "VerifyQuantizedNodeS16Type.h" +#include "VerifyQuantizedNodeGranularity.h" +#include "VerifyQuantizedNodeType.h" +#include "VerifyQuantizedBiasScale.h" +#include "helpers/LayerInfoMap.h" #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> @@ -28,12 +28,33 @@ namespace luci void QuantizedModelVerifier::verify(loco::Graph *g) { - if (_quantized_dtype != Type::U8 && _quantized_dtype != Type::S16) - throw std::runtime_error("Unsupported quantized dtype"); - - if (_granularity != Granularity::ChannelWise && _granularity != Granularity::LayerWise) + if (_ctx->granularity != Granularity::ChannelWise && _ctx->granularity != Granularity::LayerWise) throw std::runtime_error("Unsupported granularity"); + auto info_by_name = layer_info_map(g, _ctx->layers_info); + + auto quantize_dtype = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization dtype + if (iter != info_by_name.end()) + return iter->second.dtype; + + // Return default quantization dtype + return _ctx->output_model_dtype; + }; + + auto quantize_granularity = [&](const luci::CircleNode *node) { + auto iter = info_by_name.find(node->name()); + + // Return designated quantization granularity + if (iter != info_by_name.end()) + return iter->second.granularity; + + // Return default quantization granularity + return _ctx->granularity; + }; + for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast<luci::CircleNode *>(node); @@ -46,32 +67,17 @@ void QuantizedModelVerifier::verify(loco::Graph *g) }; // Verify Type - if (_quantized_dtype == Type::U8) - { - VerifyQuantizedNodeU8Type vt; - if (!circle_node->accept(&vt)) - throw std::runtime_error("Wrong data type detected in " + node_name()); - } - else if (_quantized_dtype == Type::S16) - { - VerifyQuantizedNodeS16Type vt; - if (!circle_node->accept(&vt)) - throw std::runtime_error("Wrong data type detected in " + node_name()); - } + if (!VerifyQuantizedNodeType::create(quantize_dtype(circle_node))->verify(circle_node)) + throw std::runtime_error("Wrong data type detected in " + node_name()); // Verify Granularity - if (_granularity == Granularity::LayerWise) - { - VerifyQuantizedNodeLayerWiseGranularity vg; - if (!circle_node->accept(&vg)) - throw std::runtime_error("Wrong granularity detected in " + node_name()); - } - else if (_granularity == Granularity::ChannelWise) - { - VerifyQuantizedNodeChannelWiseGranularity vg; - if (!circle_node->accept(&vg)) - throw std::runtime_error("Wrong granularity detected in " + node_name()); - } + if (!circle_node->accept( + VerifyQuantizedNodeGranularity::create(quantize_granularity(circle_node)).get())) + throw std::runtime_error("Wrong granularity detected in " + node_name()); + + // Verify Bias scale + if (!VerifyQuantizedBiasScale::create()->verify(circle_node)) + throw std::runtime_error("Wrong bias scale detected in " + node_name()); } } diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.h b/compiler/luci/pass/src/QuantizedModelVerifier.h index d5fbb8e74..7409a51d7 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.h +++ b/compiler/luci/pass/src/QuantizedModelVerifier.h @@ -21,6 +21,8 @@ #include <loco.h> +#include <memory> + namespace luci { @@ -31,18 +33,40 @@ namespace luci */ struct QuantizedModelVerifier { +public: + struct Context + { + loco::DataType output_model_dtype = loco::DataType::Unknown; + QuantizationGranularity granularity = QuantizationGranularity::ChannelWise; + loco::DataType input_type = loco::DataType::Unknown; + loco::DataType output_type = loco::DataType::Unknown; + bool TF_style_maxpool = false; + std::vector<LayerInfo> layers_info; + }; public: QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity) - : _quantized_dtype(quantized_dtype), _granularity(granularity) { + _ctx = std::make_unique<Context>(); + { + _ctx->output_model_dtype = quantized_dtype; + _ctx->granularity = granularity; + _ctx->input_type = quantized_dtype; + _ctx->output_type = quantized_dtype; + _ctx->TF_style_maxpool = false; + } + } + +public: + QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)} + { + // DO NOTHING } void verify(loco::Graph *g); private: - loco::DataType _quantized_dtype; - QuantizationGranularity _granularity; + std::unique_ptr<Context> _ctx; }; } // namespace luci diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp index 3a6d86c33..cebafd32b 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp +++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp @@ -17,6 +17,7 @@ #include "QuantizedModelVerifier.h" #include "luci/Pass/QuantizeWithMinMaxPass.h" +#include "luci/Pass/QuantizationParameters.h" #include <luci/test/TestIOGraph.h> @@ -112,57 +113,77 @@ void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granu verifier.verify(g); } -// Helper function to reduce duplicate test codes -// Assumption: g->output()->from() is the target node -void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype, - Granularity granularity, Type wrong_dtype) +void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype, + Granularity granularity) { - luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity); - pass.run(g->g()); - - auto node = loco::must_cast<luci::CircleNode *>(g->output()->from()); - node->dtype(wrong_dtype); + // A layer named "test" has dtype different from quantized_dtype + luci::LayerInfo info; + { + info.name = "test"; + // dtype is different from quantized_dtype + info.dtype = quantized_dtype == Type::U8 ? Type::S16 : Type::U8; + info.granularity = Granularity::ChannelWise; + } - luci::QuantizedModelVerifier verifier(quantized_dtype, granularity); - verifier.verify(g->g()); -} + // Do quantization + { + auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>(); + { + ctx->input_model_dtype = Type::FLOAT32; + ctx->output_model_dtype = quantized_dtype; + ctx->granularity = granularity; + ctx->input_type = quantized_dtype; + ctx->output_type = quantized_dtype; + ctx->TF_style_maxpool = false; + ctx->layers_info.push_back(info); + } -void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype, - Granularity granularity, Type wrong_dtype, - luci::CircleNode *target) -{ - luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity); - pass.run(g->g()); + luci::QuantizeWithMinMaxPass pass(std::move(ctx)); + pass.run(g); + } - target->dtype(wrong_dtype); + // Do verification + { + auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>(); + { + ctx->output_model_dtype = quantized_dtype; + ctx->granularity = granularity; + ctx->input_type = quantized_dtype; + ctx->output_type = quantized_dtype; + ctx->TF_style_maxpool = false; + ctx->layers_info.push_back(info); + } - luci::QuantizedModelVerifier verifier(quantized_dtype, granularity); - verifier.verify(g->g()); + luci::QuantizedModelVerifier verifier(std::move(ctx)); + verifier.verify(g); + } } // Helper function to reduce duplicate test codes // Assumption: g->output()->from() is the target node -void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype, - Granularity granularity) +void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype, + Granularity granularity, Type wrong_dtype) { luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity); pass.run(g->g()); auto node = loco::must_cast<luci::CircleNode *>(g->output()->from()); - insert_scale_zp(node, 1.0, 1); + node->dtype(wrong_dtype); luci::QuantizedModelVerifier verifier(quantized_dtype, granularity); verifier.verify(g->g()); } // Helper function to reduce duplicate test codes +// Assumption: g->output()->from() is the target node void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype, - Granularity granularity, luci::CircleNode *target) + Granularity granularity) { luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity); pass.run(g->g()); - insert_scale_zp(target, 1.0, 1); + auto node = loco::must_cast<luci::CircleNode *>(g->output()->from()); + insert_scale_zp(node, 1.0, 1); luci::QuantizedModelVerifier verifier(quantized_dtype, granularity); verifier.verify(g->g()); @@ -230,6 +251,8 @@ public: _instnorm->input(input()); _instnorm->gamma(_gamma); _instnorm->beta(_beta); + _instnorm->fusedActivationFunction(luci::FusedActFunc::NONE); + _instnorm->name("test"); } output()->from(_instnorm); @@ -256,6 +279,7 @@ public: _logistic = g()->nodes()->create<luci::CircleLogistic>(); { _logistic->x(input()); + _logistic->name("test"); } output()->from(_logistic); @@ -275,6 +299,7 @@ public: _lrn = g()->nodes()->create<luci::CircleLocalResponseNormalization>(); { _lrn->input(input()); + _lrn->name("test"); } output()->from(_lrn); @@ -295,6 +320,7 @@ public: { _softmax->logits(input()); _softmax->beta(0.1); + _softmax->name("test"); } output()->from(_softmax); @@ -324,6 +350,7 @@ public: _stob->input(input()); _stob->block_shape(_block_shape); _stob->paddings(_paddings); + _stob->name("test"); } output()->from(_stob); @@ -346,6 +373,7 @@ public: { _stod->input(input()); _stod->block_size(2); + _stod->name("test"); } output()->from(_stod); @@ -375,6 +403,7 @@ public: _slice->input(input()); _slice->begin(_begin); _slice->size(_size); + _slice->name("test"); } output()->from(_slice); @@ -472,6 +501,7 @@ public: _slice->begin(_begin); _slice->end(_end); _slice->strides(_strides); + _slice->name("test"); } output()->from(_slice); @@ -499,6 +529,7 @@ public: { _reshape->tensor(input()); _reshape->shape(_shape); + _reshape->name("test"); } output()->from(_reshape); @@ -519,6 +550,7 @@ public: _tanh = g()->nodes()->create<luci::CircleTanh>(); { _tanh->x(input()); + _tanh->name("test"); } output()->from(_tanh); @@ -538,6 +570,7 @@ public: _floor = g()->nodes()->create<luci::CircleFloor>(); { _floor->x(input()); + _floor->name("test"); } output()->from(_floor); @@ -601,6 +634,7 @@ public: _btos->input(input()); _btos->block_shape(_block_shape); _btos->crops(_crops); + _btos->name("test"); } output()->from(_btos); @@ -623,6 +657,7 @@ public: { _dtos->input(input()); _dtos->block_size(2); + _dtos->name("test"); } output()->from(_dtos); @@ -645,6 +680,7 @@ public: _pack->values(0, input()); _pack->values(1, _param); _pack->axis(0); + _pack->name("test"); } output()->from(_pack); @@ -680,6 +716,7 @@ public: { _pad->input(input()); _pad->paddings(_paddings); + _pad->name("test"); } output()->from(_pad); @@ -707,6 +744,7 @@ public: _pad->input(input()); _pad->paddings(_paddings); _pad->constant_values(_constant_values); + _pad->name("test"); } output()->from(_pad); @@ -735,6 +773,7 @@ public: _mirror_pad->input(input()); _mirror_pad->paddings(_paddings); _mirror_pad->mode(luci::MirrorPadMode::REFLECT); + _mirror_pad->name("test"); } output()->from(_mirror_pad); @@ -761,6 +800,7 @@ public: { _transpose->a(input()); _transpose->perm(_perm); + _transpose->name("test"); } output()->from(_transpose); @@ -784,6 +824,8 @@ public: _concat->values(0, input()); _concat->values(1, _param); _concat->axis(0); + _concat->fusedActivationFunction(luci::FusedActFunc::NONE); + _concat->name("test"); } output()->from(_concat); @@ -795,6 +837,54 @@ private: luci::CircleConst *_param = nullptr; }; +template <Type indexT> class OneHotTestGraph final : public SimpleTestGraph +{ +public: + void init(void) override + { + TestIOGraph::init({32}, {32, 10}); + { + // input dtype is float by default, but OneHot's input should have indexType (s32/s64) + input()->dtype(indexT); + } + + _depth = g()->nodes()->template create<luci::CircleConst>(); + { + _depth->dtype(loco::DataType::S32); + } + + _on_value = g()->nodes()->template create<luci::CircleConst>(); + { + _on_value->dtype(loco::DataType::FLOAT32); + } + + _off_value = g()->nodes()->template create<luci::CircleConst>(); + { + _off_value->dtype(loco::DataType::FLOAT32); + } + + _one_hot = g()->nodes()->template create<luci::CircleOneHot>(); + { + _one_hot->indices(input()); + _one_hot->depth(_depth); + _one_hot->on_value(_on_value); + _one_hot->off_value(_off_value); + _one_hot->axis(-1); + _one_hot->dtype(loco::DataType::FLOAT32); + _one_hot->name("test"); + } + output()->from(_one_hot); + + set_minmax_to_non_const(g(), -1, 1); + } + +private: + luci::CircleOneHot *_one_hot = nullptr; + luci::CircleConst *_depth = nullptr; + luci::CircleConst *_on_value = nullptr; + luci::CircleConst *_off_value = nullptr; +}; + // Test graph for comparison Ops // GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph @@ -866,6 +956,7 @@ public: { _div->x(input()); _div->y(_const); + _div->name("test"); } output()->from(_div); @@ -893,6 +984,7 @@ public: { _floor_div->x(input()); _floor_div->y(_const); + _floor_div->name("test"); } output()->from(_floor_div); @@ -917,6 +1009,7 @@ public: _rsqrt = g()->nodes()->create<luci::CircleRsqrt>(); { _rsqrt->x(input()); + _rsqrt->name("test"); } output()->from(_rsqrt); @@ -936,6 +1029,7 @@ public: _sqrt = g()->nodes()->create<luci::CircleSqrt>(); { _sqrt->x(input()); + _sqrt->name("test"); } output()->from(_sqrt); @@ -955,6 +1049,7 @@ public: _elu = g()->nodes()->create<luci::CircleElu>(); { _elu->features(input()); + _elu->name("test"); } output()->from(_elu); @@ -977,6 +1072,7 @@ public: { _pow->x(input()); _pow->y(_const); + _pow->name("test"); } output()->from(_pow); @@ -1004,6 +1100,7 @@ public: { _resize_bilinear->input(input()); _resize_bilinear->size(_size); + _resize_bilinear->name("test"); } output()->from(_resize_bilinear); @@ -1027,6 +1124,7 @@ public: { _resize_nearest_neighbor->input(input()); _resize_nearest_neighbor->size(_size); + _resize_nearest_neighbor->name("test"); } output()->from(_resize_nearest_neighbor); @@ -1067,6 +1165,62 @@ private: luci::CircleConst *_unpack_dim = nullptr; }; +class MulTestGraph final : public SimpleTestGraph +{ +public: + void init(void) override + { + TestIOGraph::init({32}, {32}); + + _const = create_dummy_const<Type::FLOAT32>(g(), {32}); + _mul = g()->nodes()->create<luci::CircleMul>(); + { + _mul->x(input()); + _mul->y(_const); + _mul->fusedActivationFunction(luci::FusedActFunc::NONE); + _mul->name("test"); + } + output()->from(_mul); + + set_minmax_to_non_const(g(), -1, 1); + } + + loco::Node *x() { return _mul->x(); } + loco::Node *y() { return _mul->y(); } + +private: + luci::CircleMul *_mul = nullptr; + luci::CircleConst *_const = nullptr; +}; + +class AddTestGraph final : public SimpleTestGraph +{ +public: + void init(void) override + { + TestIOGraph::init({32}, {32}); + + _const = create_dummy_const<Type::FLOAT32>(g(), {32}); + _add = g()->nodes()->create<luci::CircleAdd>(); + { + _add->x(input()); + _add->y(_const); + _add->fusedActivationFunction(luci::FusedActFunc::NONE); + _add->name("test"); + } + output()->from(_add); + + set_minmax_to_non_const(g(), -1, 1); + } + + loco::Node *x() { return _add->x(); } + loco::Node *y() { return _add->y(); } + +private: + luci::CircleAdd *_add = nullptr; + luci::CircleConst *_const = nullptr; +}; + } // namespace // Quantize and verify with given configurations @@ -1078,6 +1232,15 @@ private: EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \ } while (0) +// Quantize and verify with layer info +#define TEST_WITH_LAYER_INFO(graph, type, granularity) \ + do \ + { \ + graph g; \ + g.init(); \ + EXPECT_NO_THROW(quantize_and_verify_with_layer_info(g.g(), type, granularity)); \ + } while (0) + // Quantize and verify with wrong type #define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype) \ do \ @@ -1098,25 +1261,34 @@ private: // Quantize and verify with wrong type // Users can specify the test target -#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \ - do \ - { \ - graph g; \ - g.init(); \ - auto node = loco::must_cast<luci::CircleNode *>(target); \ - EXPECT_ANY_THROW( \ - quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype, node)); \ +#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \ + do \ + { \ + graph g; \ + g.init(); \ + auto node = loco::must_cast<luci::CircleNode *>(target); \ + luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \ + pass.run(g.g()); \ + auto after_node = loco::must_cast<luci::CircleNode *>(target); \ + after_node->dtype(wrong_dtype); \ + luci::QuantizedModelVerifier verifier(type, granularity); \ + EXPECT_ANY_THROW(verifier.verify(g.g())); \ } while (0) // Quantize and verify with wrong granularity // Users can specify the test target -#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \ - do \ - { \ - graph g; \ - g.init(); \ - auto node = loco::must_cast<luci::CircleNode *>(target); \ - EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity, node)); \ +#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \ + do \ + { \ + graph g; \ + g.init(); \ + auto node = loco::must_cast<luci::CircleNode *>(target); \ + luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \ + pass.run(g.g()); \ + auto after_node = loco::must_cast<luci::CircleNode *>(target); \ + insert_scale_zp(after_node, 1.0, 1); \ + luci::QuantizedModelVerifier verifier(type, granularity); \ + EXPECT_ANY_THROW(verifier.verify(g.g())); \ } while (0) // Test a local helper function @@ -1145,6 +1317,10 @@ TEST(QuantizedModelVerifierTest, InstanceNorm) TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1169,6 +1345,10 @@ TEST(QuantizedModelVerifierTest, LocalResponseNormalization) TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1199,6 +1379,10 @@ TEST(QuantizedModelVerifierTest, Logistic) TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1223,6 +1407,10 @@ TEST(QuantizedModelVerifierTest, Softmax) TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1247,6 +1435,10 @@ TEST(QuantizedModelVerifierTest, SpaceToBatchND) TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1271,6 +1463,10 @@ TEST(QuantizedModelVerifierTest, SpaceToDepth) TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1299,6 +1495,14 @@ TEST(QuantizedModelVerifierTest, Slice) TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1379,6 +1583,10 @@ TEST(QuantizedModelVerifierTest, StridedSlice) TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1463,6 +1671,10 @@ TEST(QuantizedModelVerifierTest, BatchToSpaceND) TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1487,6 +1699,10 @@ TEST(QuantizedModelVerifierTest, DepthToSpace) TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1511,6 +1727,10 @@ TEST(QuantizedModelVerifierTest, Concatenation) TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1557,6 +1777,10 @@ TEST(QuantizedModelVerifierTest, Reshape) TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1581,6 +1805,10 @@ TEST(QuantizedModelVerifierTest, Tanh) TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(TanhTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1606,6 +1834,10 @@ TEST(QuantizedModelVerifierTest, Pack) TEST_WITH_GRAPH(PackTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(PackTestGraph, Type::S16, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(PackTestGraph, Type::S16, Granularity::ChannelWise); + // Test if Pack's qparam is propagated to the input { PackTestGraph g; @@ -1640,6 +1872,10 @@ TEST(QuantizedModelVerifierTest, Pad) TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(PadTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1664,6 +1900,10 @@ TEST(QuantizedModelVerifierTest, PadV2) TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(PadV2TestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1688,6 +1928,10 @@ TEST(QuantizedModelVerifierTest, MirrorPad) TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1712,6 +1956,10 @@ TEST(QuantizedModelVerifierTest, Transpose) TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1736,6 +1984,10 @@ TEST(QuantizedModelVerifierTest, Floor) TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(FloorTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1869,11 +2121,59 @@ TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG) SUCCEED(); } +TEST(QuantizedModelVerifierTest, OneHot) +{ + TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, OneHot_wrong_input_type_NEG) +{ + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8); + + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, OneHot_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise); + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise); + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise); + + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise); + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise); + TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + TEST(QuantizedModelVerifierTest, Div) { TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(DivTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1902,6 +2202,10 @@ TEST(QuantizedModelVerifierTest, FloorDiv) TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1930,6 +2234,10 @@ TEST(QuantizedModelVerifierTest, Rsqrt) TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1954,6 +2262,10 @@ TEST(QuantizedModelVerifierTest, Sqrt) TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -1978,6 +2290,10 @@ TEST(QuantizedModelVerifierTest, Elu) TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(EluTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -2002,6 +2318,10 @@ TEST(QuantizedModelVerifierTest, Pow) TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(PowTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -2030,6 +2350,10 @@ TEST(QuantizedModelVerifierTest, ResizeBilinear) TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -2054,6 +2378,10 @@ TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor) TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise); TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise); TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise); SUCCEED(); } @@ -2099,6 +2427,93 @@ TEST(QuantizedModelVerifierTest, Unpack_wrong_granularity_NEG) SUCCEED(); } +TEST(QuantizedModelVerifierTest, Add) +{ + TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(AddTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(AddTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, Add_wrong_type_NEG) +{ + TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(AddTestGraph, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, Add_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.x()); + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.x()); + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.x()); + + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.y()); + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.y()); + TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.y()); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, Mul) +{ + TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(MulTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(MulTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, Mul_wrong_type_NEG) +{ + TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(MulTestGraph, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, Mul_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.x()); + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.x()); + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.x()); + + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.y()); + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.y()); + TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.y()); + SUCCEED(); +} + +// TODO Add following testcases +// +// CircleConv2D +// +// CircleDepthwiseConv2D +// +// CirclePRelu +// +// CircleTransposeConv +// +// CircleFullyConnected +// +// CircleAveragePool2D +// +// CircleMaxPool2D +// +// CircleMean +// +// CircleRelu +// +// CircleCast +// + #undef TEST_WITH_GRAPH #undef TEST_WITH_WRONG_TYPE #undef TEST_WITH_WRONG_GRANULARITY diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp new file mode 100644 index 000000000..8a10ad4a0 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantQuantizePass.h" + +#include <luci/IR/CircleNode.h> + +/** + * Remove redundant quantize operations. For subsequent Quantize Ops, + * only the last Quantize Op is valid, so we can remove the rest of the Quantize Op. + * + * BEFORE + * [CircleNode_1] + * | + * [CircleQuantize, dtype_1, scale_1, zero_point_1] + * | + * [CircleQuantize, dtype_2, scale_2, zero_point_2] + * | + * [CircleNode_2] + * + * AFTER + * [CircleNode_1] + * / \ + * / \ + * / \ + * / \ + * / \ + * [CircleQuantize, dtype_2, scale_2, zero_point_2] [CircleQuantize, dtype_1, scale_1, zero_point_1] + * | + * [CircleNode_2] + * + */ + +namespace +{ + +bool remove_redundant_quantize(luci::CircleQuantize *node) +{ + auto pred_node = loco::must_cast<luci::CircleNode *>(node->input()); + + if (node->quantparam() == nullptr or pred_node->quantparam() == nullptr) + return false; + + if (node->quantparam()->scale.size() != 1 or node->quantparam()->zerop.size() != 1 or + pred_node->quantparam()->scale.size() != 1 or pred_node->quantparam()->zerop.size() != 1) + { + return false; + } + + if (node->dtype() != pred_node->dtype() or + pred_node->quantparam()->scale.at(0) != node->quantparam()->scale.at(0) or + pred_node->quantparam()->zerop.at(0) != node->quantparam()->zerop.at(0)) + { + return false; + } + + replace(node).with(pred_node); + + return true; +} + +bool remove_redundant_subsequent_quantize(luci::CircleQuantize *node) +{ + auto pred_node = dynamic_cast<luci::CircleQuantize *>(node->input()); + if (pred_node == nullptr) + return remove_redundant_quantize(node); + + node->input(pred_node->input()); + return true; +} + +} // namespace + +namespace luci +{ + +bool RemoveRedundantQuantizePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + if (auto quantize_node = dynamic_cast<luci::CircleQuantize *>(node)) + { + if (remove_redundant_subsequent_quantize(quantize_node)) + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp new file mode 100644 index 000000000..d0166bd20 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantQuantizePass.h" + +#include <luci/IR/CircleNodes.h> + +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +class QuantizeGraphlet +{ +public: + QuantizeGraphlet() = default; + +public: + void init(loco::Graph *g) + { + _first_quantize = g->nodes()->create<luci::CircleQuantize>(); + _first_quantize->dtype(loco::DataType::U8); + { + auto quantize_param = std::make_unique<luci::CircleQuantParam>(); + quantize_param->scale = {0.5}; + quantize_param->zerop = {0}; + _first_quantize->quantparam(std::move(quantize_param)); + } + _first_quantize->name("first_quantize"); + + _second_quantize = g->nodes()->create<luci::CircleQuantize>(); + _second_quantize->dtype(loco::DataType::U8); + { + auto quantize_param = std::make_unique<luci::CircleQuantParam>(); + quantize_param->scale = {0.5}; + quantize_param->zerop = {0}; + _second_quantize->quantparam(std::move(quantize_param)); + } + _second_quantize->name("second_quantize"); + } + +protected: + luci::CircleQuantize *_first_quantize = nullptr; + luci::CircleQuantize *_second_quantize = nullptr; +}; + +class RedundantSubsequentQuantizeGraph : public TestIOGraph, public QuantizeGraphlet +{ +public: + RedundantSubsequentQuantizeGraph() = default; + +public: + void init(void) + { + TestIOGraph::init({1}, {1}); + QuantizeGraphlet::init(g()); + + input()->dtype(loco::DataType::U8); + { + auto quantize_param = std::make_unique<luci::CircleQuantParam>(); + quantize_param->scale = {1}; + quantize_param->zerop = {1}; + input()->quantparam(std::move(quantize_param)); + } + + _first_quantize->input(input()); + _second_quantize->input(_first_quantize); + + output()->from(_second_quantize); + output()->dtype(loco::DataType::U8); + } +}; + +class RedundantQuantizeGraph : public TestIOGraph, public QuantizeGraphlet +{ +public: + RedundantQuantizeGraph() = default; + +public: + void init(void) + { + TestIOGraph::init({1}, {1}); + QuantizeGraphlet::init(g()); + + input()->dtype(loco::DataType::U8); + { + auto quantize_param = std::make_unique<luci::CircleQuantParam>(); + quantize_param->scale = {0.5}; + quantize_param->zerop = {0}; + input()->quantparam(std::move(quantize_param)); + } + + _first_quantize->input(input()); + + output()->from(_first_quantize); + output()->dtype(loco::DataType::U8); + } +}; + +} // namespace + +TEST(RemoveRedundantQuantizePass, name) +{ + luci::RemoveRedundantQuantizePass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST(RemoveRedundantQuantizePass, remove_subsequent_quantize) +{ + RedundantSubsequentQuantizeGraph g; + luci::RemoveRedundantQuantizePass pass; + + g.init(); + + EXPECT_TRUE(pass.run(g.g())); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(g.g()))) + { + if (dynamic_cast<luci::CircleQuantize *>(node)) + { + count++; + } + } + + ASSERT_EQ(1, count); +} + +TEST(RemoveRedundantQuantizePass, remove_quantize) +{ + RedundantQuantizeGraph g; + luci::RemoveRedundantQuantizePass pass; + + g.init(); + + EXPECT_TRUE(pass.run(g.g())); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(g.g()))) + { + if (dynamic_cast<luci::CircleQuantize *>(node)) + { + count++; + } + } + + ASSERT_EQ(0, count); +} diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp index 71c51ecda..75cf72795 100644 --- a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp +++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp @@ -71,7 +71,7 @@ bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node) for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++) { new_const_node->at<loco::DataType::S32>(i) = - target_perm->at<loco::DataType::S32>(pred_perm->at<loco::DataType::S32>(i)); + pred_perm->at<loco::DataType::S32>(target_perm->at<loco::DataType::S32>(i)); } new_const_node->name(name + "/Transpose/perm"); diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp index e80623499..bb8e292d4 100644 --- a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp +++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp @@ -271,6 +271,31 @@ TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2) ASSERT_EQ(2, perm->at<loco::DataType::S32>(3)); } +TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type3) +{ + auto graph = loco::make_graph(); + create_redundunt_transpose(graph.get(), {0, 3, 2, 1}, {0, 2, 3, 1}); + + luci::RemoveRedundantTransposePass pass; + while (pass.run(graph.get())) + ; + luci::CircleTranspose *transpose_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto trans = dynamic_cast<luci::CircleTranspose *>(node); + if (not trans) + continue; + transpose_node = trans; + break; + } + ASSERT_NE(nullptr, transpose_node); + auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm()); + ASSERT_EQ(0, perm->at<loco::DataType::S32>(0)); + ASSERT_EQ(2, perm->at<loco::DataType::S32>(1)); + ASSERT_EQ(1, perm->at<loco::DataType::S32>(2)); + ASSERT_EQ(3, perm->at<loco::DataType::S32>(3)); +} + /** * @brief Test case that first transpose output become input of operations more than one. */ diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp index 3f0c4ee82..fb46f490d 100644 --- a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp +++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp @@ -58,6 +58,25 @@ bool remove_no_effect_reshape(luci::CircleNode *node) namespace luci { +/** + * BEFORE + * [CircleNode] + * | + * [CircleReshape] + * | + * [CircleNode] + * + * AFTER + * [CircleNode] + * | \ + * | [CircleReshape] + * | + * [CircleNode] + * + * NOTE + * This pass will remove Reshape when input and output has same shape + */ + bool RemoveUnnecessaryReshapePass::run(loco::Graph *g) { bool changed = false; diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp index a0cc0194f..bca0a9483 100644 --- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp @@ -26,8 +26,17 @@ namespace luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma) { - assert(gamma->rank() == 1); - auto channel_size = gamma->dim(0).value(); + assert(gamma->rank() == 1 or gamma->rank() == 4); + + uint32_t channel_idx = gamma->rank() - 1; + uint32_t channel_size = gamma->dim(channel_idx).value(); + + // Gamma should be broadcastable in the channel direction + for (uint32_t i = 0; i < gamma->rank(); i++) + { + if (i != channel_idx) + assert(gamma->dim(i).value() == 1); // FIX is_batchnorm_mul UNLESS + } auto name = gamma->name(); assert(name.length() > 0); @@ -53,8 +62,17 @@ luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma) luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta) { - assert(beta->rank() == 1); - auto channel_size = beta->dim(0).value(); + assert(beta->rank() == 1 or beta->rank() == 4); + + uint32_t channel_idx = beta->rank() - 1; + uint32_t channel_size = beta->dim(channel_idx).value(); + + // Beta should be broadcastable in the channel direction + for (uint32_t i = 0; i < beta->rank(); i++) + { + if (i != channel_idx) + assert(beta->dim(i).value() == 1); // FIX is_batchnorm_add UNLESS + } auto name = beta->name(); assert(name.length() > 0); diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp index 903d4dcc9..bac033112 100644 --- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp @@ -141,6 +141,37 @@ TEST(ReplaceMulAddWithDepthwiseConv, simple) } } +TEST(ReplaceMulAddWithDepthwiseConv, simple_rank4) +{ + SimpleGraph g; + + const uint32_t channel_size = 16; + g.gamma->shape({1, 1, 1, channel_size}); + g.beta->shape({1, 1, 1, channel_size}); + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + while (pass.run(&g.g)) + ; + + auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from()); + EXPECT_NE(nullptr, dwconv); + + auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter()); + auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias()); + EXPECT_NE(nullptr, weights); + EXPECT_EQ(4, weights->rank()); + EXPECT_EQ(channel_size, weights->dim(3).value()); + EXPECT_NE(nullptr, bias); + EXPECT_EQ(1, bias->rank()); + EXPECT_EQ(channel_size, bias->dim(0).value()); + + for (int i = 0; i < channel_size; i++) + { + EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i)); + EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i)); + } +} + TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG) { SimpleGraph g; @@ -154,3 +185,18 @@ TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG) EXPECT_EQ(false, changed); } + +TEST(ReplaceMulAddWithDepthwiseConv, rank3_NEG) +{ + SimpleGraph g; + + g.input->shape({4, 4, 16}); + g.mul->shape({4, 4, 16}); + g.add->shape({4, 4, 16}); + g.output->shape({4, 4, 16}); + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + auto changed = pass.run(&g.g); + + EXPECT_EQ(false, changed); +} diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp index 9cba9a9e7..57c386d99 100644 --- a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp +++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp @@ -24,15 +24,6 @@ namespace { -void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src) -{ - auto q = src->quantparam(); - if (q == nullptr) - dst->quantparam(nullptr); - else - dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q)); -} - // SplitV is substituted to Split if the contents of size_splits are all same // For example, // size_splits = [32, 32] -> substitute @@ -67,7 +58,7 @@ bool resolve_splitv(luci::CircleSplitV *sv) split_node->split_dim(sv->split_dim()); split_node->num_split(sv->num_split()); split_node->name(sv->name()); - copy_quantparam(split_node, sv); + copy_quantparam(sv, split_node); luci::add_origin(split_node, luci::get_origin(sv)); auto succs = loco::succs(sv); @@ -78,7 +69,7 @@ bool resolve_splitv(luci::CircleSplitV *sv) so_node->input(split_node); so_node->index(svo->index()); so_node->name(svo->name()); - copy_quantparam(so_node, svo); + copy_quantparam(svo, so_node); luci::add_origin(so_node, luci::get_origin(svo)); replace(svo).with(so_node); diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp index f48763782..df7266df9 100644 --- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp +++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp @@ -76,18 +76,6 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input) } /** - * @brief copy quantparam of src to dst - */ -void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src) -{ - auto q = src->quantparam(); - if (q == nullptr) - dst->quantparam(nullptr); - else - dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q)); -} - -/** * @brief return CircleConst ptr with values of new_shape */ luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape) @@ -142,7 +130,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze) auto graph = squeeze->graph(); auto reshape = graph->nodes()->create<luci::CircleReshape>(); auto shape_const = create_shape_const(graph, reshape_shape); - copy_quantparam(reshape, squeeze); + copy_quantparam(squeeze, reshape); reshape->name(name + "/Reshape"); luci::add_origin(reshape, luci::get_origin(squeeze)); shape_const->name(name + "/Reshape/shape"); diff --git a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp index f50f2f54f..9e1c5a4a3 100644 --- a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp +++ b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp @@ -124,7 +124,7 @@ bool substitute_strided_slice_to_reshape(luci::CircleStridedSlice *ss_node) std::bitset<32> end_mask(ss_node->end_mask()); std::bitset<32> shrink_axis_mask(ss_node->shrink_axis_mask()); - uint input_rank = input_node->rank(); + uint32_t input_rank = input_node->rank(); for (uint32_t i = 0; i < input_rank; i++) { if (!input_node->dim(i).known()) diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp new file mode 100644 index 000000000..e65d576cd --- /dev/null +++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "VerifyQuantizedBiasScale.h" + +#include <cmath> + +// This macro is undef at the end of the file +#define RETURN_FALSE_UNLESS(ARG) \ + if (not(ARG)) \ + { \ + return false; \ + } + +namespace +{ + +bool same(float a, float b) +{ + constexpr float epsilon = 1e-10; + return abs(a - b) < epsilon; +} + +// Check bias scale = input scale * weight scale +// This function checks both LWQ and CWQ +bool check_bias_scale(const loco::Node *input, const loco::Node *weights, const loco::Node *bias) +{ + auto input_node = loco::must_cast<const luci::CircleNode *>(input); + auto input_qparam = input_node->quantparam(); + RETURN_FALSE_UNLESS(input_qparam != nullptr); + + auto weights_node = loco::must_cast<const luci::CircleNode *>(weights); + auto weights_qparam = weights_node->quantparam(); + RETURN_FALSE_UNLESS(weights_qparam != nullptr); + + auto bias_node = loco::must_cast<const luci::CircleNode *>(bias); + auto bias_qparam = bias_node->quantparam(); + RETURN_FALSE_UNLESS(bias_qparam != nullptr); + + RETURN_FALSE_UNLESS(input_qparam->scale.size() == 1); + RETURN_FALSE_UNLESS(weights_qparam->scale.size() == bias_qparam->scale.size()); + + auto input_scale = input_qparam->scale[0]; + for (uint32_t i = 0; i < weights_qparam->scale.size(); i++) + { + auto weights_scale = weights_qparam->scale[i]; + auto bias_scale = bias_qparam->scale[i]; + RETURN_FALSE_UNLESS(same(bias_scale, input_scale * weights_scale)); + } + return true; +} + +} // namespace + +namespace luci +{ + +bool VerifyQuantizedBiasScale::visit(const luci::CircleConv2D *node) +{ + RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias())); + return true; +} + +bool VerifyQuantizedBiasScale::visit(const luci::CircleDepthwiseConv2D *node) +{ + RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias())); + return true; +} + +bool VerifyQuantizedBiasScale::visit(const luci::CircleFullyConnected *node) +{ + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + { + RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->weights(), node->bias())); + } + return true; +} + +bool VerifyQuantizedBiasScale::visit(const luci::CircleTransposeConv *node) +{ + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + { + RETURN_FALSE_UNLESS(check_bias_scale(node->outBackprop(), node->filter(), node->bias())); + } + return true; +} + +} // namespace luci + +#undef RETURN_FALSE_UNLESS diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.h b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h new file mode 100644 index 000000000..b41f78eca --- /dev/null +++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__ +#define __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__ + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +#include <memory> + +namespace luci +{ + +/** + * @brief Verify the scale of quantized bias node + * @details + * + * Bias of CONV, DCONV, TCONV, FC layers should meet the following condition. + * + * bias scale = input scale * weights scale + */ +class VerifyQuantizedBiasScale : public luci::CircleNodeVisitor<bool> +{ +public: + static std::shared_ptr<VerifyQuantizedBiasScale> create() + { + return std::make_shared<VerifyQuantizedBiasScale>(); + }; + +public: + bool verify(luci::CircleNode *node) { return node->accept(this); } + +private: + // Operators with bias + bool visit(const luci::CircleConv2D *node); + bool visit(const luci::CircleDepthwiseConv2D *node); + bool visit(const luci::CircleFullyConnected *node); + bool visit(const luci::CircleTransposeConv *node); + + bool visit(const luci::CircleNode *) { return true; } +}; + +} // namespace luci + +#endif // __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__ diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp new file mode 100644 index 000000000..8697090a7 --- /dev/null +++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "VerifyQuantizedNodeGranularity.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Pass/QuantizationParameters.h> + +#include <memory> + +namespace luci +{ + +std::shared_ptr<VerifyQuantizedNodeGranularity> +VerifyQuantizedNodeGranularity::create(Granularity granularity) +{ + if (granularity == Granularity::ChannelWise) + return std::make_shared<VerifyQuantizedNodeChannelWiseGranularity>(); + else if (granularity == Granularity::LayerWise) + return std::make_shared<VerifyQuantizedNodeLayerWiseGranularity>(); + else + throw std::domain_error("Not supported Granularity type"); +} + +} // namespace luci diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h index bf3ff2e8a..442183c18 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h @@ -1,5 +1,6 @@ /* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -13,13 +14,15 @@ * limitations under the License. */ -#ifndef __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__ -#define __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__ +#ifndef __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__ +#define __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__ #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> #include <luci/Pass/QuantizationParameters.h> +#include <memory> + using Granularity = luci::QuantizationGranularity; // This macro is undef at the end of the file @@ -33,16 +36,19 @@ namespace luci { /** - * @brief Verify the granualrity of channel-wise quantized node + * @brief Verify the granualrity of quantized node * @details * * Targets to verify * - node's output (i.e., node itself) * - node's inputs */ -struct VerifyQuantizedNodeChannelWiseGranularity final : public luci::CircleNodeVisitor<bool> +class VerifyQuantizedNodeGranularity : public luci::CircleNodeVisitor<bool> { -private: +public: + static std::shared_ptr<VerifyQuantizedNodeGranularity> create(Granularity granularity); + +protected: bool is_lwq(const loco::Node *node) { auto circle_node = loco::must_cast<const luci::CircleNode *>(node); @@ -59,48 +65,15 @@ private: return true; } - uint32_t rank(const loco::Node *node) - { - auto circle_node = loco::must_cast<const luci::CircleNode *>(node); - return circle_node->rank(); - } - - bool is_cwq_const(const loco::Node *node, uint32_t channel_dim) - { - auto circle_node = loco::must_cast<const luci::CircleConst *>(node); - - assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS - auto channel_size = circle_node->dim(channel_dim).value(); - - if (circle_node->quantparam() == nullptr) - return false; - - if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim)) - return false; - - if (circle_node->quantparam()->scale.size() != channel_size) - return false; - - if (circle_node->quantparam()->zerop.size() != channel_size) - return false; - - return true; - } - private: - bool visit(const luci::CircleConv2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) - return true; - } + virtual bool visit(const luci::CircleConv2D *node) = 0; bool visit(const luci::CircleConcatenation *node) { + // Skip granularity check for concatenation of indices + if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64) + return true; + RETURN_FALSE_UNLESS(is_lwq(node)) for (uint32_t i = 0; i < node->numValues(); i++) { @@ -116,25 +89,9 @@ private: return true; } - bool visit(const luci::CircleDepthwiseConv2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) - return true; - } + virtual bool visit(const luci::CircleDepthwiseConv2D *node) = 0; - bool visit(const luci::CircleInstanceNorm *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1)) - RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1)) - return true; - } + virtual bool visit(const luci::CircleInstanceNorm *node) = 0; bool visit(const luci::CirclePack *node) { @@ -168,37 +125,11 @@ private: return true; } - bool visit(const luci::CirclePRelu *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1)) - return true; - } - - bool visit(const luci::CircleTransposeConv *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->outBackprop())) - RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) + virtual bool visit(const luci::CirclePRelu *node) = 0; - return true; - } + virtual bool visit(const luci::CircleTransposeConv *node) = 0; - bool visit(const luci::CircleFullyConnected *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - // Bias is optional (it can be CircleOutputExclude) - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) - return true; - } + virtual bool visit(const luci::CircleFullyConnected *node) = 0; bool visit(const luci::CircleAdd *node) { @@ -258,6 +189,14 @@ private: return true; } + bool visit(const luci::CircleOneHot *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)); + RETURN_FALSE_UNLESS(is_lwq(node->off_value())); + RETURN_FALSE_UNLESS(is_lwq(node->on_value())); + return true; + } + bool visit(const luci::CircleRelu *node) { RETURN_FALSE_UNLESS(is_lwq(node)); @@ -480,8 +419,186 @@ private: bool visit(const luci::CircleNode *) { return true; } }; +class VerifyQuantizedNodeChannelWiseGranularity final : public VerifyQuantizedNodeGranularity +{ +private: + uint32_t rank(const loco::Node *node) + { + auto circle_node = loco::must_cast<const luci::CircleNode *>(node); + return circle_node->rank(); + } + + bool is_cwq_const(const loco::Node *node, uint32_t channel_dim) + { + auto circle_node = loco::must_cast<const luci::CircleConst *>(node); + + assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS + auto channel_size = circle_node->dim(channel_dim).value(); + + if (circle_node->quantparam() == nullptr) + return false; + + if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim)) + return false; + + if (circle_node->quantparam()->scale.size() != channel_size) + return false; + + if (circle_node->quantparam()->zerop.size() != channel_size) + return false; + + return true; + } + +private: + bool visit(const luci::CircleConv2D *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) + return true; + } + + bool visit(const luci::CircleDepthwiseConv2D *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) + return true; + } + + bool visit(const luci::CircleInstanceNorm *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1)) + RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1)) + return true; + } + + bool visit(const luci::CirclePRelu *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1)) + return true; + } + + bool visit(const luci::CircleTransposeConv *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->outBackprop())) + RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) + + return true; + } + + bool visit(const luci::CircleFullyConnected *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + // Bias is optional (it can be CircleOutputExclude) + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1)) + return true; + } +}; + +class VerifyQuantizedNodeLayerWiseGranularity final : public VerifyQuantizedNodeGranularity +{ +private: + bool is_lwq_const(const loco::Node *node) + { + auto circle_node = loco::must_cast<const luci::CircleConst *>(node); + + if (circle_node->quantparam() == nullptr) + return false; + + if (circle_node->quantparam()->scale.size() != 1) + return false; + + if (circle_node->quantparam()->zerop.size() != 1) + return false; + + return true; + } + +private: + bool visit(const luci::CircleConv2D *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) + return true; + } + + bool visit(const luci::CircleDepthwiseConv2D *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) + return true; + } + + bool visit(const luci::CircleInstanceNorm *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_lwq_const(node->gamma())) + RETURN_FALSE_UNLESS(is_lwq_const(node->beta())) + return true; + } + + bool visit(const luci::CirclePRelu *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_lwq_const(node->alpha())) + return true; + } + + bool visit(const luci::CircleTransposeConv *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->outBackprop())) + RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) + return true; + } + + bool visit(const luci::CircleFullyConnected *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)) + RETURN_FALSE_UNLESS(is_lwq(node->input())) + RETURN_FALSE_UNLESS(is_lwq_const(node->weights())) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) + return true; + } +}; + } // namespace luci #undef RETURN_FALSE_UNLESS -#endif // __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__ +#endif // __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__ diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h deleted file mode 100644 index 9bc8b31df..000000000 --- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h +++ /dev/null @@ -1,473 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__ -#define __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__ - -#include <luci/IR/CircleNodes.h> -#include <luci/IR/CircleNodeVisitor.h> -#include <luci/Pass/QuantizationParameters.h> - -using Granularity = luci::QuantizationGranularity; - -// This macro is undef at the end of the file -#define RETURN_FALSE_UNLESS(ARG) \ - if (not(ARG)) \ - { \ - return false; \ - } - -namespace luci -{ - -/** - * @brief Verify the granualrity of layer-wise quantized node - * @details - * - * Targets to verify - * - node's output (i.e., node itself) - * - node's inputs - */ -struct VerifyQuantizedNodeLayerWiseGranularity final : public luci::CircleNodeVisitor<bool> -{ -private: - bool is_lwq(const loco::Node *node) - { - auto circle_node = loco::must_cast<const luci::CircleNode *>(node); - - if (circle_node->quantparam() == nullptr) - return false; - - if (circle_node->quantparam()->scale.size() != 1) - return false; - - if (circle_node->quantparam()->zerop.size() != 1) - return false; - - return true; - } - - bool is_lwq_const(const loco::Node *node) - { - auto circle_node = loco::must_cast<const luci::CircleConst *>(node); - - if (circle_node->quantparam() == nullptr) - return false; - - if (circle_node->quantparam()->scale.size() != 1) - return false; - - if (circle_node->quantparam()->zerop.size() != 1) - return false; - - return true; - } - -private: - bool visit(const luci::CircleConv2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) - return true; - } - - bool visit(const luci::CircleConcatenation *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - for (uint32_t i = 0; i < node->numValues(); i++) - { - RETURN_FALSE_UNLESS(is_lwq(node->values(i))); - } - return true; - } - - bool visit(const luci::CircleDepthToSpace *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - return true; - } - - bool visit(const luci::CircleDepthwiseConv2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) - return true; - } - - bool visit(const luci::CircleInstanceNorm *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq_const(node->gamma())) - RETURN_FALSE_UNLESS(is_lwq_const(node->beta())) - return true; - } - - bool visit(const luci::CirclePack *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - for (uint32_t i = 0; i < node->values_count(); i++) - { - RETURN_FALSE_UNLESS(is_lwq(node->values(i))); - } - return true; - } - - bool visit(const luci::CirclePad *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - return true; - } - - bool visit(const luci::CirclePadV2 *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq(node->constant_values())) - return true; - } - - bool visit(const luci::CircleMirrorPad *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - return true; - } - - bool visit(const luci::CirclePRelu *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq_const(node->alpha())) - return true; - } - - bool visit(const luci::CircleTransposeConv *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->outBackprop())) - RETURN_FALSE_UNLESS(is_lwq_const(node->filter())) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) - return true; - } - - bool visit(const luci::CircleFullyConnected *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())) - RETURN_FALSE_UNLESS(is_lwq_const(node->weights())) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(is_lwq_const(node->bias())) - return true; - } - - bool visit(const luci::CircleAdd *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleAveragePool2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->value())); - return true; - } - - bool visit(const luci::CircleLogicalOr *) - { - // Logical OR has bool-type inputs and output - // Nothing to be checked - return true; - } - - bool visit(const luci::CircleMaxPool2D *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->value())); - return true; - } - - bool visit(const luci::CircleLocalResponseNormalization *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleMean *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleMul *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleNotEqual *node) - { - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleRelu *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)) - RETURN_FALSE_UNLESS(is_lwq(node->features())); - return true; - } - - bool visit(const luci::CircleReshape *node) - { - auto input = loco::must_cast<const luci::CircleNode *>(node->tensor()); - bool input_quantized = input->quantparam() != nullptr; - bool node_quantized = node->quantparam() != nullptr; - RETURN_FALSE_UNLESS(input_quantized == node_quantized); - RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node)) - RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input)); - return true; - } - - bool visit(const luci::CircleLogistic *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - return true; - } - - bool visit(const luci::CircleSoftmax *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->logits())); - return true; - } - - bool visit(const luci::CircleSpaceToBatchND *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleSpaceToDepth *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleSlice *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleSplit *node) - { - // node's output is the input of CircleSplitOut, thus not quantized - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleSplitOut *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - return true; - } - - bool visit(const luci::CircleSplitV *node) - { - // node's output is the input of CircleSplitVOut, thus not quantized - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleSplitVOut *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - return true; - } - - bool visit(const luci::CircleStridedSlice *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleArgMax *node) - { - // node's output is index, thus not quantized - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleBatchToSpaceND *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleTanh *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - return true; - } - - bool visit(const luci::CircleTranspose *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->a())); - return true; - } - - bool visit(const luci::CircleFloor *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - return true; - } - - bool visit(const luci::CircleGreater *node) - { - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleGreaterEqual *node) - { - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleDiv *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleFloorDiv *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleRsqrt *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - return true; - } - - bool visit(const luci::CircleSqrt *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - return true; - } - - bool visit(const luci::CircleElu *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->features())); - return true; - } - - bool visit(const luci::CirclePow *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->x())); - RETURN_FALSE_UNLESS(is_lwq(node->y())); - return true; - } - - bool visit(const luci::CircleResizeBilinear *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleResizeNearestNeighbor *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - RETURN_FALSE_UNLESS(is_lwq(node->input())); - return true; - } - - bool visit(const luci::CircleUnpack *node) - { - // node's output is the input of CircleUnpackOut, thus not quantized - RETURN_FALSE_UNLESS(is_lwq(node->value())); - return true; - } - - bool visit(const luci::CircleUnpackOut *node) - { - RETURN_FALSE_UNLESS(is_lwq(node)); - return true; - } - - bool visit(const luci::CircleCast *node) - { - auto input = loco::must_cast<const luci::CircleNode *>(node->x()); - bool input_quantized = input->quantparam() != nullptr; - bool node_quantized = node->quantparam() != nullptr; - RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input)); - RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node)); - return true; - } - - // TODO: Implement more Ops - - bool visit(const luci::CircleNode *) { return true; } -}; - -} // namespace luci - -#undef RETURN_FALSE_UNLESS - -#endif // __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__ diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h deleted file mode 100644 index eeec7b82b..000000000 --- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__ -#define __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__ - -#include <luci/IR/CircleNodes.h> -#include <luci/IR/CircleNodeVisitor.h> - -#include <cmath> - -using Type = loco::DataType; - -// This macro is undef at the end of the file -#define RETURN_FALSE_UNLESS(ARG) \ - if (not(ARG)) \ - { \ - return false; \ - } - -namespace luci -{ - -/** - * @brief Verify the data type of INT16 quantized node - * @details - * - * Targets to verify - * - node's output (i.e., node itself) - * - node's inputs - */ -struct VerifyQuantizedNodeS16Type final : public luci::CircleNodeVisitor<bool> -{ -private: - bool has_type(const loco::Node *node, Type dtype) - { - auto circle_node = loco::must_cast<const luci::CircleNode *>(node); - return circle_node->dtype() == dtype; - } - -private: - bool visit(const luci::CircleConv2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64)) - return true; - } - - bool visit(const luci::CircleConcatenation *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - for (uint32_t i = 0; i < node->numValues(); i++) - { - RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16)) - } - return true; - } - - bool visit(const luci::CircleDepthToSpace *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleDepthwiseConv2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64)) - return true; - } - - bool visit(const luci::CircleInstanceNorm *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->beta(), Type::S16)) - return true; - } - - bool visit(const luci::CirclePack *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - for (uint32_t i = 0; i < node->values_count(); i++) - { - RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16)) - } - return true; - } - - bool visit(const luci::CirclePad *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - return true; - } - - bool visit(const luci::CirclePadV2 *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::S16)) - return true; - } - - bool visit(const luci::CircleMirrorPad *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - return true; - } - - bool visit(const luci::CirclePRelu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::S16)) - return true; - } - - bool visit(const luci::CircleTransposeConv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(has_type(bias, Type::S64)) - return true; - } - - bool visit(const luci::CircleFullyConnected *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->weights(), Type::S16)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(has_type(bias, Type::S64)) - return true; - } - - bool visit(const luci::CircleAdd *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleAveragePool2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16)) - return true; - } - - bool visit(const luci::CircleLogicalOr *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL)) - return true; - } - - bool visit(const luci::CircleMaxPool2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16)) - return true; - } - - bool visit(const luci::CircleLocalResponseNormalization *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleMean *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32)) - return true; - } - - bool visit(const luci::CircleMul *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleNotEqual *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleRelu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16)) - return true; - } - - bool visit(const luci::CircleReshape *node) - { - if (node->quantparam()) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::S16)) - } - else - { - RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype())) - } - luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape()); - if (shape != nullptr) - RETURN_FALSE_UNLESS(has_type(shape, Type::S32)) - return true; - } - - bool visit(const luci::CircleLogistic *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); - return true; - } - - bool visit(const luci::CircleSoftmax *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->logits(), Type::S16)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); - return true; - } - - bool visit(const luci::CircleSpaceToBatchND *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleSpaceToDepth *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleSlice *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64)) - RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64)) - return true; - } - - bool visit(const luci::CircleSplit *node) - { - // node's output is the input of CircleSplitOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleSplitOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - - // SplitOut has the same qparam with the input of Split - auto split = loco::must_cast<luci::CircleSplit *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(split->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleSplitV *node) - { - // node's output is the input of CircleSplitVOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleSplitVOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - - // SplitVOut has the same qparam with the input of SplitV - auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleStridedSlice *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - - auto input = loco::must_cast<luci::CircleNode *>(node->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleArgMax *node) - { - RETURN_FALSE_UNLESS(has_type(node, node->output_type())) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) || - has_type(node->dimension(), Type::S64)) - return true; - } - - bool visit(const luci::CircleBatchToSpaceND *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleTanh *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); - return true; - } - - bool visit(const luci::CircleTranspose *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->a(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32)) - return true; - } - - bool visit(const luci::CircleFloor *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - - // This checks the value of scale is an integer - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); - return true; - } - - bool visit(const luci::CircleGreater *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleGreaterEqual *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleDiv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleFloorDiv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - - // This checks the value of scale is an integer - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); - return true; - } - - bool visit(const luci::CircleRsqrt *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - return true; - } - - bool visit(const luci::CircleSqrt *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - return true; - } - - bool visit(const luci::CircleElu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16)) - return true; - } - - bool visit(const luci::CirclePow *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16)) - return true; - } - - bool visit(const luci::CircleResizeBilinear *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleResizeNearestNeighbor *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) - return true; - } - - bool visit(const luci::CircleUnpack *node) - { - // node's output is the input of CircleUnpackOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16)) - return true; - } - - bool visit(const luci::CircleUnpackOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - - // UnpackOut has the same qparam with the input of Unpack - auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(Unpack->value()); - RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleCast *node) - { - auto *input = loco::must_cast<luci::CircleNode *>(node->x()); - RETURN_FALSE_UNLESS(has_type(input, node->in_data_type())) - - bool input_quantized = input->quantparam() != nullptr; - if (input_quantized) - RETURN_FALSE_UNLESS(has_type(input, Type::S16)) - - RETURN_FALSE_UNLESS(has_type(node, node->out_data_type())) - - bool node_quantized = node->quantparam() != nullptr; - if (node_quantized) - RETURN_FALSE_UNLESS(has_type(node, Type::S16)) - return true; - } - - // TODO: Implement more Ops - - bool visit(const luci::CircleNode *) { return true; } -}; - -} // namespace luci - -#undef RETURN_FALSE_UNLESS - -#endif // __LUCI_VERIFY_QUNTIZED_NODE_S16_TYPE_H__ diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp new file mode 100644 index 000000000..4e1c062c0 --- /dev/null +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "VerifyQuantizedNodeType.h" + +#include <cmath> +#include <memory> + +// This macro is undef at the end of the file +#define RETURN_FALSE_UNLESS(ARG) \ + if (not(ARG)) \ + { \ + return false; \ + } + +namespace luci +{ + +std::shared_ptr<VerifyQuantizedNodeType> VerifyQuantizedNodeType::create(loco::DataType dtype) +{ + if (dtype == loco::DataType::U8) + return std::make_shared<VerifyQuantizedNodeU8Type>(); + else if (dtype == loco::DataType::S16) + return std::make_shared<VerifyQuantizedNodeS16Type>(); + else + throw std::domain_error("Not supported Quantized type"); +} + +} // namespace luci + +namespace luci +{ + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleArgMax *node) +{ + RETURN_FALSE_UNLESS(has_type(node, node->output_type())) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->dimension(), loco::DataType::S32) || + has_type(node->dimension(), loco::DataType::S64)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAveragePool2D *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleBatchToSpaceND *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleCast *node) +{ + auto *input = loco::must_cast<luci::CircleNode *>(node->x()); + bool input_quantized = input->quantparam() != nullptr; + if (input_quantized) + { + RETURN_FALSE_UNLESS(has_type(input, node->in_data_type())) + RETURN_FALSE_UNLESS(has_type(input, Qtype)) + } + + bool node_quantized = node->quantparam() != nullptr; + if (node_quantized) + { + RETURN_FALSE_UNLESS(has_type(node, node->out_data_type())) + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + } + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConv2D *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->bias(), Btype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConcatenation *node) +{ + // Allow concatenation of indices + if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64)) + return true; + + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthToSpace *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthwiseConv2D *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->bias(), Btype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDiv *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleElu *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloor *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, Qtype)); + + // This checks the value of scale is an integer + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloorDiv *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, Qtype)); + + // This checks the value of scale is an integer + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFullyConnected *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->weights(), Qtype)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(has_type(bias, Btype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreater *node) +{ + RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL)) + RETURN_FALSE_UNLESS(has_type(node->x(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->y(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreaterEqual *node) +{ + RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL)) + RETURN_FALSE_UNLESS(has_type(node->x(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->y(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleInstanceNorm *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit( + const luci::CircleLocalResponseNormalization *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleLogicalOr *node) +{ + return group_has_type(node, loco::DataType::BOOL); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMaxPool2D *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMean *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMirrorPad *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleNotEqual *node) +{ + RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL)) + RETURN_FALSE_UNLESS(has_type(node->x(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->y(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleOneHot *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)); + RETURN_FALSE_UNLESS(has_type(node->indices(), loco::DataType::S32) || + has_type(node->indices(), loco::DataType::S64)); + RETURN_FALSE_UNLESS(has_type(node->depth(), loco::DataType::S32)); + RETURN_FALSE_UNLESS(has_type(node->on_value(), Qtype)); + RETURN_FALSE_UNLESS(has_type(node->off_value(), Qtype)); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePack *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePad *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePadV2 *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32)) + RETURN_FALSE_UNLESS(has_type(node->constant_values(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePRelu *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReshape *node) +{ + if (node->quantparam()) + { + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->tensor(), Qtype)) + } + else + { + RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype())) + } + luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape()); + if (shape != nullptr) + RETURN_FALSE_UNLESS(has_type(shape, loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeBilinear *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeNearestNeighbor *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRsqrt *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSlice *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->begin(), loco::DataType::S32) || + has_type(node->begin(), loco::DataType::S64)) + RETURN_FALSE_UNLESS(has_type(node->size(), loco::DataType::S32) || + has_type(node->size(), loco::DataType::S64)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToBatchND *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToDepth *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplit *node) +{ + // node's output is the input of CircleSplitOut, thus not quantized + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitOut *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + + // SplitOut has the same qparam with the input of Split + auto split = loco::must_cast<luci::CircleSplit *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(split->input()); + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitV *node) +{ + // node's output is the input of CircleSplitVOut, thus not quantized + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitVOut *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + + // SplitVOut has the same qparam with the input of SplitV + auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSqrt *node) +{ + return group_has_type(node, Qtype); +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleStridedSlice *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + + auto input = loco::must_cast<luci::CircleNode *>(node->input()); + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTranspose *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->a(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->perm(), loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTransposeConv *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype)) + luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); + if (bias != nullptr) + RETURN_FALSE_UNLESS(has_type(bias, Btype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpack *node) +{ + // node's output is the input of CircleUnpackOut, thus not quantized + RETURN_FALSE_UNLESS(has_type(node->value(), Qtype)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpackOut *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + + // UnpackOut has the same qparam with the input of Unpack + auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(Unpack->value()); + RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; +} + +} // namespace luci + +namespace luci +{ + +bool VerifyQuantizedNodeU8Type::visit(const luci::CircleTanh *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128); + return true; +} + +bool VerifyQuantizedNodeU8Type::visit(const luci::CircleLogistic *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); + return true; +} + +bool VerifyQuantizedNodeU8Type::visit(const luci::CircleSoftmax *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); + return true; +} + +} // namespace luci + +namespace luci +{ + +bool VerifyQuantizedNodeS16Type::visit(const luci::CircleTanh *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); + return true; +} + +bool VerifyQuantizedNodeS16Type::visit(const luci::CircleLogistic *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); + return true; +} + +bool VerifyQuantizedNodeS16Type::visit(const luci::CircleSoftmax *node) +{ + RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16)); + + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); + return true; +} + +} // namespace luci + +#undef RETURN_FALSE_UNLESS diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h new file mode 100644 index 000000000..ff1acbd6f --- /dev/null +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__ +#define __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__ + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> + +namespace luci +{ + +/** + * @brief Verify the data type of quantized node + * @details + * + * Targets to verify + * - node's output (i.e., node itself) + * - node's inputs + */ +class VerifyQuantizedNodeType +{ +public: + static std::shared_ptr<VerifyQuantizedNodeType> create(loco::DataType dtype); + +public: + virtual bool verify(luci::CircleNode *node) = 0; +}; + +/** + * @brief Verify using quantization type of a node and bias + * + * @tparam Qtype Quantization type for a node (e.g. Q8, Q16, ...) + * @tparam Btype Bias quantization type (e.g. For Q8, S32 is used) + */ +template <loco::DataType Qtype, loco::DataType Btype> +class VerifyQuantizedNodeTypeBase : public luci::CircleNodeVisitor<bool>, + public VerifyQuantizedNodeType +{ +public: + bool verify(luci::CircleNode *node) { return node->accept(this); } + +protected: + bool has_type(const loco::Node *node, loco::DataType dtype) + { + auto circle_node = loco::must_cast<const luci::CircleNode *>(node); + return circle_node->dtype() == dtype; + } + + // Check whether a node and all of its inputs have dtype or not + bool group_has_type(const loco::Node *node, loco::DataType dtype) + { + if (!has_type(node, dtype)) + return false; + + for (uint32_t i = 0; i < node->arity(); ++i) + if (!has_type(node->arg(i), dtype)) + return false; + + return true; + } + +private: + bool visit(const luci::CircleAdd *node); + bool visit(const luci::CircleArgMax *node); + bool visit(const luci::CircleAveragePool2D *node); + bool visit(const luci::CircleBatchToSpaceND *node); + bool visit(const luci::CircleCast *node); + bool visit(const luci::CircleConv2D *node); + bool visit(const luci::CircleConcatenation *node); + bool visit(const luci::CircleDepthToSpace *node); + bool visit(const luci::CircleDepthwiseConv2D *node); + bool visit(const luci::CircleDiv *node); + bool visit(const luci::CircleElu *node); + bool visit(const luci::CircleFloor *node); + bool visit(const luci::CircleFloorDiv *node); + bool visit(const luci::CircleFullyConnected *node); + bool visit(const luci::CircleGreater *node); + bool visit(const luci::CircleGreaterEqual *node); + bool visit(const luci::CircleInstanceNorm *node); + bool visit(const luci::CircleLocalResponseNormalization *node); + bool visit(const luci::CircleLogicalOr *node); + bool visit(const luci::CircleMaxPool2D *node); + bool visit(const luci::CircleMean *node); + bool visit(const luci::CircleMirrorPad *node); + bool visit(const luci::CircleMul *node); + bool visit(const luci::CircleNotEqual *node); + bool visit(const luci::CircleOneHot *node); + bool visit(const luci::CirclePack *node); + bool visit(const luci::CirclePad *node); + bool visit(const luci::CirclePadV2 *node); + bool visit(const luci::CirclePRelu *node); + bool visit(const luci::CirclePow *node); + bool visit(const luci::CircleRelu *node); + bool visit(const luci::CircleReshape *node); + bool visit(const luci::CircleResizeBilinear *node); + bool visit(const luci::CircleResizeNearestNeighbor *node); + bool visit(const luci::CircleRsqrt *node); + bool visit(const luci::CircleSlice *node); + bool visit(const luci::CircleSpaceToBatchND *node); + bool visit(const luci::CircleSpaceToDepth *node); + bool visit(const luci::CircleSplit *node); + bool visit(const luci::CircleSplitOut *node); + bool visit(const luci::CircleSplitV *node); + bool visit(const luci::CircleSplitVOut *node); + bool visit(const luci::CircleSqrt *node); + bool visit(const luci::CircleStridedSlice *node); + bool visit(const luci::CircleTranspose *node); + bool visit(const luci::CircleTransposeConv *node); + bool visit(const luci::CircleUnpack *node); + bool visit(const luci::CircleUnpackOut *node); + + // NOTE below nodes has differnent implementation for Qtype/Btype and + // implementations exist in VerifyQuantizedNodeU8Type, VerifyQuantizedNodeS16Type + // bool visit(const luci::CircleLogistic *node); + // bool visit(const luci::CircleSoftmax *node); + // bool visit(const luci::CircleTanh *node); + + // TODO: Implement more Ops + + bool visit(const luci::CircleNode *) { return true; } +}; + +class VerifyQuantizedNodeU8Type + : public VerifyQuantizedNodeTypeBase<loco::DataType::U8, loco::DataType::S32> +{ +private: + bool visit(const luci::CircleLogistic *node); + bool visit(const luci::CircleSoftmax *node); + bool visit(const luci::CircleTanh *node); +}; + +class VerifyQuantizedNodeS16Type + : public VerifyQuantizedNodeTypeBase<loco::DataType::S16, loco::DataType::S64> +{ +private: + bool visit(const luci::CircleLogistic *node); + bool visit(const luci::CircleSoftmax *node); + bool visit(const luci::CircleTanh *node); +}; + +} // namespace luci + +#endif // __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__ diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h deleted file mode 100644 index e7dd1b072..000000000 --- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__ -#define __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__ - -#include <luci/IR/CircleNodes.h> -#include <luci/IR/CircleNodeVisitor.h> - -#include <cmath> - -using Type = loco::DataType; - -// This macro is undef at the end of the file -#define RETURN_FALSE_UNLESS(ARG) \ - if (not(ARG)) \ - { \ - return false; \ - } - -namespace luci -{ - -/** - * @brief Verify the data type of UINT8 quantized node - * @details - * - * Targets to verify - * - node's output (i.e., node itself) - * - node's inputs - */ -struct VerifyQuantizedNodeU8Type final : public luci::CircleNodeVisitor<bool> -{ -private: - bool has_type(const loco::Node *node, Type dtype) - { - auto circle_node = loco::must_cast<const luci::CircleNode *>(node); - return circle_node->dtype() == dtype; - } - -private: - bool visit(const luci::CircleConv2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32)) - return true; - } - - bool visit(const luci::CircleConcatenation *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - for (uint32_t i = 0; i < node->numValues(); i++) - { - RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8)) - } - return true; - } - - bool visit(const luci::CircleDepthToSpace *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleDepthwiseConv2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32)) - return true; - } - - bool visit(const luci::CircleInstanceNorm *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->beta(), Type::U8)) - return true; - } - - bool visit(const luci::CirclePack *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - for (uint32_t i = 0; i < node->values_count(); i++) - { - RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8)) - } - return true; - } - - bool visit(const luci::CirclePad *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - return true; - } - - bool visit(const luci::CirclePadV2 *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::U8)) - return true; - } - - bool visit(const luci::CircleMirrorPad *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32)) - return true; - } - - bool visit(const luci::CirclePRelu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::U8)) - return true; - } - - bool visit(const luci::CircleTransposeConv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(has_type(bias, Type::S32)) - return true; - } - - bool visit(const luci::CircleFullyConnected *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->weights(), Type::U8)) - luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias()); - if (bias != nullptr) - RETURN_FALSE_UNLESS(has_type(bias, Type::S32)) - return true; - } - - bool visit(const luci::CircleAdd *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleAveragePool2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8)) - return true; - } - - bool visit(const luci::CircleBatchToSpaceND *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleLogicalOr *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL)) - return true; - } - - bool visit(const luci::CircleMaxPool2D *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8)) - return true; - } - - bool visit(const luci::CircleLocalResponseNormalization *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleMean *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32)) - return true; - } - - bool visit(const luci::CircleMul *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleNotEqual *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleRelu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8)) - return true; - } - - bool visit(const luci::CircleReshape *node) - { - if (node->quantparam()) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::U8)) - } - else - { - RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype())) - } - luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape()); - if (shape != nullptr) - RETURN_FALSE_UNLESS(has_type(shape, Type::S32)) - return true; - } - - bool visit(const luci::CircleLogistic *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); - return true; - } - - bool visit(const luci::CircleSoftmax *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->logits(), Type::U8)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0); - return true; - } - - bool visit(const luci::CircleSpaceToBatchND *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleSpaceToDepth *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleSlice *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64)) - RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64)) - return true; - } - - bool visit(const luci::CircleSplit *node) - { - // node's output is the input of CircleSplitOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleSplitOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - - // SplitOut has the same qparam with the input of Split - auto split = loco::must_cast<luci::CircleSplit *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(split->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleSplitV *node) - { - // node's output is the input of CircleSplitVOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleSplitVOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - - // SplitVOut has the same qparam with the input of SplitV - auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleStridedSlice *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - - auto input = loco::must_cast<luci::CircleNode *>(node->input()); - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleArgMax *node) - { - RETURN_FALSE_UNLESS(has_type(node, node->output_type())) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) || - has_type(node->dimension(), Type::S64)) - return true; - } - - bool visit(const luci::CircleTanh *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128); - return true; - } - - bool visit(const luci::CircleTranspose *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->a(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32)) - return true; - } - - bool visit(const luci::CircleFloor *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - - // This checks the value of scale is an integer - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); - return true; - } - - bool visit(const luci::CircleGreater *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleGreaterEqual *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::BOOL)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleDiv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleFloorDiv *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - - // This checks the value of scale is an integer - RETURN_FALSE_UNLESS(node->quantparam()); - RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]); - return true; - } - - bool visit(const luci::CircleRsqrt *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - return true; - } - - bool visit(const luci::CircleSqrt *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - return true; - } - - bool visit(const luci::CircleElu *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8)) - return true; - } - - bool visit(const luci::CirclePow *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8)) - return true; - } - - bool visit(const luci::CircleResizeBilinear *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleResizeNearestNeighbor *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) - return true; - } - - bool visit(const luci::CircleUnpack *node) - { - // node's output is the input of CircleUnpackOut, thus not quantized - RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8)) - return true; - } - - bool visit(const luci::CircleUnpackOut *node) - { - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - - // UnpackOut has the same qparam with the input of Unpack - auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); - auto input = loco::must_cast<luci::CircleNode *>(Unpack->value()); - RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam()); - RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); - RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); - return true; - } - - bool visit(const luci::CircleCast *node) - { - auto *input = loco::must_cast<luci::CircleNode *>(node->x()); - bool input_quantized = input->quantparam() != nullptr; - if (input_quantized) - { - RETURN_FALSE_UNLESS(has_type(input, node->in_data_type())) - RETURN_FALSE_UNLESS(has_type(input, Type::U8)) - } - - bool node_quantized = node->quantparam() != nullptr; - if (node_quantized) - { - RETURN_FALSE_UNLESS(has_type(node, node->out_data_type())) - RETURN_FALSE_UNLESS(has_type(node, Type::U8)) - } - return true; - } - - // TODO: Implement more Ops - - bool visit(const luci::CircleNode *) { return true; } -}; - -} // namespace luci - -#undef RETURN_FALSE_UNLESS - -#endif // __LUCI_VERIFY_QUNTIZED_NODE_U8_TYPE_H__ diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp new file mode 100644 index 000000000..ac07f9ec9 --- /dev/null +++ b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LayerInfoMap.h" + +#include <luci/IR/CircleNode.h> + +#include <cassert> + +namespace luci +{ +namespace +{ + +bool is_multiple_output_node(const luci::CircleNode *node) +{ + switch (node->opcode()) + { + // The following nodes have multiple outputs. Output tensors are not produced by themselves but + // by the corresponding *Out nodes. + case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::SPLIT_V: + case luci::CircleOpcode::TOPK_V2: + case luci::CircleOpcode::UNIQUE: + case luci::CircleOpcode::UNPACK: + return true; + // TODO: Support ops + case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM: + case luci::CircleOpcode::CUSTOM: + case luci::CircleOpcode::IF: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5: + case luci::CircleOpcode::WHILE: + throw std::runtime_error("Unsupported op now"); + default: + return false; + } +} + +const luci::CircleNode *get_multi_output_node(const luci::CircleNode *node) +{ + if (is_multiple_output_node(node)) + return node; + + switch (node->opcode()) + { + // The following nodes denote outputs of multiple-output nodes. + case luci::CircleOpcode::CIRCLESPLITOUT: + { + const auto split_out = loco::must_cast<const CircleSplitOut *>(node); + return loco::must_cast<luci::CircleNode *>(split_out->input()); + } + case luci::CircleOpcode::CIRCLESPLITVOUT: + { + const auto splitv_out = loco::must_cast<const CircleSplitVOut *>(node); + return loco::must_cast<luci::CircleNode *>(splitv_out->input()); + } + case luci::CircleOpcode::CIRCLETOPKV2OUT: + { + const auto top_kv2_out = loco::must_cast<const CircleTopKV2Out *>(node); + return loco::must_cast<luci::CircleNode *>(top_kv2_out->input()); + } + case luci::CircleOpcode::CIRCLEUNIQUEOUT: + { + const auto unique_out = loco::must_cast<const CircleUniqueOut *>(node); + return loco::must_cast<luci::CircleNode *>(unique_out->input()); + } + case luci::CircleOpcode::CIRCLEUNPACKOUT: + { + const auto unpack_out = loco::must_cast<const CircleUnpackOut *>(node); + return loco::must_cast<luci::CircleNode *>(unpack_out->input()); + } + // TODO: Support these ops + case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT: + case luci::CircleOpcode::CIRCLEWHILEOUT: + throw std::runtime_error("Unsupported op now"); + default: + return nullptr; + } +} + +bool same_setting(const LayerInfo &left, const LayerInfo &right) +{ + return left.dtype == right.dtype and left.granularity == right.granularity; +} + +void add_multi_output_node(LayerInfoMap &info_by_name, LayerInfo &layer_info, + const luci::CircleNode *node) +{ + assert(is_multiple_output_node(node)); // FIX_CALLER_UNLESS + + const auto succs_nodes = loco::succs(node); + const auto name = node->name(); + + if (info_by_name.find(name) != info_by_name.end()) + { + // Check that all outputs have equal dtype and granularity + for (const auto succs_node : succs_nodes) + { + const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node); + + const auto it = info_by_name.find(succs_circle_node->name()); + if (it != info_by_name.end() and not same_setting(layer_info, (it->second))) + throw std::runtime_error("Outputs of multiple-output nodes should have equal dtype and " + "granularity. Check the quantization configuration file"); + } + return; + } + + // Add multiple output node to info_by_name + info_by_name[name] = {name, layer_info.dtype, layer_info.granularity}; + + // Add outputs node to info_by_name + for (const auto succs_node : succs_nodes) + { + const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node); + const auto succs_circle_node_name = succs_circle_node->name(); + info_by_name[succs_circle_node_name] = {succs_circle_node_name, layer_info.dtype, + layer_info.granularity}; + } +} + +} // namespace + +LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info) +{ + LayerInfoMap info_by_name; + + for (auto &&info : layers_info) + { + auto name = info.name; + bool found = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto cnode = loco::must_cast<luci::CircleNode *>(node); + if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT) + continue; + + if (cnode->name() == name) + { + // Check and add multiple-output node and its outputs to info_by_name + if (const auto multi_output = get_multi_output_node(cnode)) + { + add_multi_output_node(info_by_name, info, multi_output); + found = true; + continue; + } + + if (info_by_name.find(name) != info_by_name.end()) + { + throw std::runtime_error("Duplicate layer name " + name + + ". Check layer names in the quantization configuration file."); + } + + info_by_name[name] = info; + found = true; + continue; + } + } + + if (not found) + throw std::runtime_error("No such layer named " + name + + ". Check layer names in the quantization configuration file."); + } + + // TODO Check all names in layers_info exist in the info_by_name + // TODO Check names in info_by_name but not in layers_info are from virtual outputs + + return info_by_name; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.h b/compiler/luci/pass/src/helpers/LayerInfoMap.h new file mode 100644 index 000000000..bb4724a50 --- /dev/null +++ b/compiler/luci/pass/src/helpers/LayerInfoMap.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__ +#define __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__ + +#include <luci/Pass/QuantizationParameters.h> + +#include <unordered_map> + +namespace luci +{ + +using LayerInfoMap = std::unordered_map<std::string, luci::LayerInfo>; + +LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info); + +} // namespace luci + +#endif // __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__ diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp new file mode 100644 index 000000000..2ed28eda4 --- /dev/null +++ b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LayerInfoMap.h" + +#include <luci/IR/CircleNode.h> +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +namespace +{ + +class SoftmaxTestGraph : public luci::test::TestIOGraph +{ +public: + void init(void) + { + TestIOGraph::init({32}, {32}); + _softmax = g()->nodes()->create<luci::CircleSoftmax>(); + { + _softmax->logits(input()); + _softmax->beta(0.1); + _softmax->name("test"); + } + output()->from(_softmax); + } + +private: + luci::CircleSoftmax *_softmax = nullptr; +}; + +class SplitAddTestGraph : public luci::test::TestIOGraph +{ +public: + void init(void) + { + TestIOGraph::init({6, 1, 2}, {3, 1, 2}); + _split_dim = g()->nodes()->create<luci::CircleConst>(); + { + _split_dim->rank(1); + _split_dim->dtype(loco::DataType::S32); + _split_dim->size<loco::DataType::S32>(1); + _split_dim->at<loco::DataType::S32>(0); + _split_dim->shape({1}); + _split_dim->name("split_dim"); + } + + _split = g()->nodes()->create<luci::CircleSplit>(); + { + _split->input(input()); + _split->num_split(2); + _split->split_dim(_split_dim); + _split->name("split0"); + } + + _split_out_1 = g()->nodes()->create<luci::CircleSplitOut>(); + { + _split_out_1->input(_split); + _split_out_1->index(0); + _split_out_1->name("split0"); + } + + _split_out_2 = g()->nodes()->create<luci::CircleSplitOut>(); + { + _split_out_2->input(_split); + _split_out_2->index(1); + _split_out_2->name("split1"); + } + + _add = g()->nodes()->create<luci::CircleAdd>(); + { + _add->x(_split_out_1); + _add->y(_split_out_2); + _add->name("add"); + } + output()->from(_add); + } + +private: + luci::CircleSplit *_split = nullptr; + luci::CircleSplitOut *_split_out_1 = nullptr; + luci::CircleSplitOut *_split_out_2 = nullptr; + luci::CircleConst *_split_dim = nullptr; + luci::CircleAdd *_add = nullptr; +}; + +} // namespace + +TEST(LayerInfoMapTest, simple_test) +{ + SoftmaxTestGraph g; + g.init(); + + luci::LayerInfo info; + { + info.name = "test"; + info.dtype = loco::DataType::U8; + info.granularity = luci::QuantizationGranularity::ChannelWise; + } + std::vector<luci::LayerInfo> v; + v.emplace_back(info); + auto map = luci::layer_info_map(g.g(), v); + + EXPECT_EQ("test", map["test"].name); + EXPECT_EQ(loco::DataType::U8, map["test"].dtype); + EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["test"].granularity); +} + +TEST(LayerInfoMapTest, multiple_output_node_test) +{ + SplitAddTestGraph g; + g.init(); + + luci::LayerInfo info; + { + info.name = "split0"; + info.dtype = loco::DataType::U8; + info.granularity = luci::QuantizationGranularity::ChannelWise; + } + std::vector<luci::LayerInfo> v; + v.emplace_back(info); + auto map = luci::layer_info_map(g.g(), v); + + EXPECT_EQ(map.size(), 2); + EXPECT_EQ("split0", map["split0"].name); + EXPECT_EQ("split1", map["split1"].name); + + EXPECT_EQ(loco::DataType::U8, map["split0"].dtype); + EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["split0"].granularity); +} + +TEST(LayerInfoMapTest, invalid_layer_info_multiple_output_node_NEG) +{ + SplitAddTestGraph g; + g.init(); + + luci::LayerInfo info_0; + { + info_0.name = "split0"; + info_0.dtype = loco::DataType::U8; + info_0.granularity = luci::QuantizationGranularity::ChannelWise; + } + luci::LayerInfo info_1; + { + info_1.name = "split1"; + info_1.dtype = loco::DataType::S16; + info_1.granularity = luci::QuantizationGranularity::ChannelWise; + } + std::vector<luci::LayerInfo> v; + v.emplace_back(info_0); + v.emplace_back(info_1); + + EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v)); +} + +TEST(LayerInfoMapTest, duplicate_name_NEG) +{ + SoftmaxTestGraph g; + g.init(); + g.input()->name("test"); + + luci::LayerInfo info; + { + info.name = "test"; + info.dtype = loco::DataType::U8; + info.granularity = luci::QuantizationGranularity::ChannelWise; + } + std::vector<luci::LayerInfo> v; + v.emplace_back(info); + EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v)); +} + +TEST(LayerInfoMapTest, no_name_NEG) +{ + SoftmaxTestGraph g; + g.init(); + + luci::LayerInfo info; + { + info.name = "noname"; + info.dtype = loco::DataType::U8; + info.granularity = luci::QuantizationGranularity::ChannelWise; + } + std::vector<luci::LayerInfo> v; + v.emplace_back(info); + EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v)); +} diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake index 3ccc58128..e896188be 100644 --- a/compiler/luci/requires.cmake +++ b/compiler/luci/requires.cmake @@ -4,8 +4,8 @@ require("loco") require("locop") require("logo") require("logo-core") -require("mio-circle") -require("mio-tflite") +require("mio-circle04") +require("mio-tflite280") require("oops") require("hermes") require("hermes-std") diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt index 0e6097f96..24bdfc152 100644 --- a/compiler/luci/service/CMakeLists.txt +++ b/compiler/luci/service/CMakeLists.txt @@ -10,7 +10,6 @@ add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_service PRIVATE src) target_include_directories(luci_service PUBLIC include) target_link_libraries(luci_service PUBLIC luci_lang) -target_link_libraries(luci_service PUBLIC mio_circle) target_link_libraries(luci_service PUBLIC logo_core) target_link_libraries(luci_service PRIVATE luci_log) target_link_libraries(luci_service PRIVATE luci_logex) diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h index ead12d074..2c1120941 100644 --- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h @@ -17,11 +17,12 @@ #ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__ #define __LUCI_CIRCLE_SHAPE_INFERENCE_H__ -#include <loco/IR/Nodes.h> - +#include <luci/Service/CircleShapeInferenceRule.h> #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> -#include <luci/Service/CircleShapeInferenceRule.h> + +#include <loco/IR/NodeShape.h> +#include <loco/IR/TensorShape.h> namespace luci { diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h index d62731380..e0ceabeac 100644 --- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h @@ -17,13 +17,11 @@ #ifndef __LUCI_CIRCLE_TYPE_INFERENCE_H__ #define __LUCI_CIRCLE_TYPE_INFERENCE_H__ -#include <loco/IR/Nodes.h> - -#include <mio/circle/schema_generated.h> - +#include <luci/Service/CircleTypeInferenceRule.h> #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> -#include <luci/Service/CircleTypeInferenceRule.h> + +#include <loco/IR/DataType.h> namespace luci { diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h index 3926147f5..99e4561b3 100644 --- a/compiler/luci/service/src/CircleCloneNode.h +++ b/compiler/luci/service/src/CircleCloneNode.h @@ -208,6 +208,7 @@ public: luci::CircleNode *visit(const luci::CircleSquaredDifference *) final; luci::CircleNode *visit(const luci::CircleSqueeze *) final; luci::CircleNode *visit(const luci::CircleStridedSlice *) final; + luci::CircleNode *visit(const luci::CircleSVDF *) final; luci::CircleNode *visit(const luci::CircleSub *) final; luci::CircleNode *visit(const luci::CircleSum *) final; luci::CircleNode *visit(const luci::CircleTanh *) final; @@ -269,6 +270,7 @@ public: luci::CircleNode *visit(const luci::CircleTopKV2Out *) final; luci::CircleNode *visit(const luci::CircleUniqueOut *) final; luci::CircleNode *visit(const luci::CircleUnpackOut *) final; + luci::CircleNode *visit(const luci::CircleVariable *) final; luci::CircleNode *visit(const luci::CircleWhileOut *) final; // Handle in CircleNode diff --git a/compiler/luci/service/src/CircleNodeClone.cpp b/compiler/luci/service/src/CircleNodeClone.cpp index d2033dd0c..220c6096c 100644 --- a/compiler/luci/service/src/CircleNodeClone.cpp +++ b/compiler/luci/service/src/CircleNodeClone.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "luci/IR/CircleQuantParam.h" #include "luci/Service/CircleNodeClone.h" #include "CircleCloneNode.h" @@ -45,18 +46,7 @@ void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst) dst->shape_status(src->shape_status()); // quantparam - const auto *quantparam = src->quantparam(); - if (quantparam != nullptr) - { - auto qparam = std::make_unique<luci::CircleQuantParam>(); - qparam->scale = quantparam->scale; - qparam->zerop = quantparam->zerop; - qparam->min = quantparam->min; - qparam->max = quantparam->max; - qparam->quantized_dimension = quantparam->quantized_dimension; - - dst->quantparam(std::move(qparam)); - } + copy_quantparam(src, dst); // sparsity const auto *sparsity = src->sparsityparam(); diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index 5d6a31050..9d156f3e2 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -196,23 +197,18 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node) return loco::NodeShape{output_shape}; } -template <class CIRCLENODE> loco::NodeShape use_inputs(const CIRCLENODE *node) -{ - auto inputs_shape = luci::shape_get(node->inputs()).template as<loco::TensorShape>(); - return loco::NodeShape{inputs_shape}; -} +#define DECLARE_USE_SINGLE(NAME) \ + template <class CIRCLENODE> loco::NodeShape use_##NAME(const CIRCLENODE *node) \ + { \ + auto inputs_shape = luci::shape_get(node->NAME()).template as<loco::TensorShape>(); \ + return loco::NodeShape{inputs_shape}; \ + } -template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node) -{ - auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>(); - return loco::NodeShape{x_shape}; -} +DECLARE_USE_SINGLE(inputs); +DECLARE_USE_SINGLE(x); +DECLARE_USE_SINGLE(logits); -template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node) -{ - auto shape = luci::shape_get(node->logits()).template as<loco::TensorShape>(); - return loco::NodeShape{shape}; -} +#undef DECLARE_USE_SINGLE template <class CIRCLENODE> loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings) @@ -721,6 +717,8 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node) auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>(); +// TODO Remove following unused code +#if 0 // Checking shape capability for fully connected layer // Input: a tensor of at least rank 2 [D1, D2, ... Dn] // Weight: [# of units, K] @@ -741,6 +739,40 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node) out_shape.rank(2); out_shape.dim(0) = batch_size; out_shape.dim(1) = weights_shape.dim(0); +#endif + + loco::TensorShape out_shape; + + // NOTE Some recipes in some repositories are using rank 4 input for FullyConnected. + // Until they are all fixed, disable following assert. + // TODO Enable following assert after related fixes are applied + // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L194 + // LUCI_ASSERT(input_shape.rank() == 2 || input_shape.rank() == 3, + // "Input rank of FullyConnected should be 2 or 3"); + + // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L225 + LUCI_ASSERT(weights_shape.rank() == 2, "Weights of FullyConnected should be 2"); + + // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L353-L367 + if (node->keep_num_dims()) + { + out_shape.rank(input_shape.rank()); + for (uint32_t i = 0; i < input_shape.rank(); ++i) + out_shape.dim(i) = input_shape.dim(i); + out_shape.dim(out_shape.rank() - 1) = weights_shape.dim(0); + } + else + { + uint32_t input_size = 1; + for (uint32_t i = 0; i < input_shape.rank(); i++) + { + input_size = input_size * input_shape.dim(i).value(); + } + const uint32_t batch_size = input_size / weights_shape.dim(1).value(); + out_shape.rank(2); + out_shape.dim(0) = batch_size; + out_shape.dim(1) = weights_shape.dim(0); + } return loco::NodeShape{out_shape}; } @@ -1554,6 +1586,30 @@ loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node) return loco::NodeShape{output_shape}; } +loco::NodeShape infer_svdf(const luci::CircleSVDF *node) +{ + const auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); + const auto weight_feature_shape = luci::shape_get(node->weight_feature()).as<loco::TensorShape>(); + + assert(ifm_shape.rank() == 2); + assert(weight_feature_shape.rank() == 2); + + assert(ifm_shape.dim(1) == weight_feature_shape.dim(1)); + assert(weight_feature_shape.dim(0).known()); + + const auto rank = node->svdf_rank(); + const auto num_filters = weight_feature_shape.dim(0).value(); + assert(num_filters % rank == 0); + const auto num_units = num_filters / rank; + + loco::TensorShape ofm_shape; + ofm_shape.rank(2); + ofm_shape.dim(0) = ifm_shape.dim(0); + ofm_shape.dim(1) = num_units; + + return loco::NodeShape{ofm_shape}; +} + loco::NodeShape infer_tile(const luci::CircleTile *node) { const loco::DataType S32 = loco::DataType::S32; @@ -2393,6 +2449,8 @@ public: return loco::NodeShape{output_shape}; } + loco::NodeShape visit(const luci::CircleSVDF *node) final { return infer_svdf(node); } + loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); } loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); } @@ -2486,6 +2544,8 @@ public: loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); } + loco::NodeShape visit(const luci::CircleVariable *node) final { return use_own(node); } + loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); } }; diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp index 5f6d46f2b..438c4a364 100644 --- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp @@ -478,6 +478,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); } + loco::DataType visit(const luci::CircleSVDF *node) final + { + return luci::dtype_get(node->input()); + } + loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); } loco::DataType visit(const luci::CircleTile *node) final @@ -605,6 +610,8 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT return loco::DataType::S32; } + loco::DataType visit(const luci::CircleVariable *node) final { return node->dtype(); } + loco::DataType visit(const luci::CircleUniqueOut *node) final { if (node->index() == 0) diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.cpp new file mode 100644 index 000000000..d4c3ce88f --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleSVDF.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleCloneNode.h" + +namespace luci +{ + +luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSVDF *node) +{ + if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED) + return nullptr; + + auto *cloned = _graph->nodes()->create<luci::CircleSVDF>(); + if (cloned != nullptr) + { + cloned->fusedActivationFunction(node->fusedActivationFunction()); + cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs()); + cloned->svdf_rank(node->svdf_rank()); + } + return cloned; +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp new file mode 100644 index 000000000..d6edaf1cc --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleNodeClone.h" + +#include <gtest/gtest.h> + +TEST(CloneNodeTest, clone_SVDF) +{ + auto g = loco::make_graph(); + auto node_svdf = g->nodes()->create<luci::CircleSVDF>(); + node_svdf->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto gc = loco::make_graph(); + auto cloned = luci::clone_node(node_svdf, gc.get()); + ASSERT_NE(nullptr, cloned); + ASSERT_EQ(gc.get(), cloned->graph()); + + auto cloned_svdf = dynamic_cast<luci::CircleSVDF *>(cloned); + ASSERT_NE(nullptr, cloned_svdf); + ASSERT_EQ(node_svdf->asymmetric_quantize_inputs(), cloned_svdf->asymmetric_quantize_inputs()); + ASSERT_EQ(node_svdf->svdf_rank(), cloned_svdf->svdf_rank()); +} + +TEST(CloneNodeTest, clone_SVDF_NEG) +{ + auto g = loco::make_graph(); + auto node_svdf = g->nodes()->create<luci::CircleSVDF>(); + node_svdf->fusedActivationFunction(luci::FusedActFunc::UNDEFINED); + + auto gc = loco::make_graph(); + auto cloned = luci::clone_node(node_svdf, gc.get()); + ASSERT_EQ(nullptr, cloned); +} diff --git a/compiler/luci/service/src/Nodes/CircleVariable.cpp b/compiler/luci/service/src/Nodes/CircleVariable.cpp new file mode 100644 index 000000000..c1430bd3a --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleVariable.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleCloneNode.h" + +namespace luci +{ + +luci::CircleNode *CloneNode::visit(const luci::CircleVariable *) +{ + return _graph->nodes()->create<luci::CircleVariable>(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleVariable.test.cpp b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp new file mode 100644 index 000000000..7d29438be --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleNodeClone.h" + +#include <gtest/gtest.h> + +TEST(CloneNodeTest, clone_Variable) +{ + auto g = loco::make_graph(); + auto node_dummy = g->nodes()->create<luci::CircleVariable>(); + + auto gc = loco::make_graph(); + auto cloned = luci::clone_node(node_dummy, gc.get()); + ASSERT_NE(nullptr, cloned); + ASSERT_EQ(gc.get(), cloned->graph()); + + auto cloned_variable = dynamic_cast<luci::CircleVariable *>(cloned); + ASSERT_NE(nullptr, cloned_variable); +} diff --git a/compiler/luci/tests/CMakeLists.txt b/compiler/luci/tests/CMakeLists.txt index c03835823..1333efb7d 100644 --- a/compiler/luci/tests/CMakeLists.txt +++ b/compiler/luci/tests/CMakeLists.txt @@ -1,3 +1,14 @@ +set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>) +set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>) +set(TFLITE2CIRCLE_PATH $<TARGET_FILE:tflite2circle>) +if(DEFINED ENV{BUILD_HOST_EXEC}) + # TODO use better way to represent path for host executable + set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file) + set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file) + set(TFLITE2CIRCLE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflite2circle/tflite2circle) + message(STATUS "TFLITE2CIRCLE_PATH = ${TFLITE2CIRCLE_PATH}") +endif(DEFINED ENV{BUILD_HOST_EXEC}) + # TODO use local test.recipe files for small networks file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe") @@ -17,14 +28,14 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .tflite add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}" - COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}" - DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}" + COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}" + DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" COMMENT "Generating ${RECIPE_OUTPUT_FILE}") # Generate .circle add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}" - COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}" - DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}" + COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}" + DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" COMMENT "Generating ${CIRCLE_OUTPUT_FILE}") list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}") @@ -52,14 +63,14 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .tflite add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}" - COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}" - DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}" + COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}" + DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" COMMENT "Generating ${RECIPE_OUTPUT_FILE}") # Generate .circle add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}" - COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}" - DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}" + COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}" + DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" COMMENT "Generating ${CIRCLE_OUTPUT_FILE}") list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}") @@ -87,8 +98,8 @@ foreach(RECIPE IN ITEMS ${RECIPES2}) # Generate .circle add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}" - COMMAND circlechef-file "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}" - DEPENDS circlechef-file "${RECIPE_SOURCE_FILE}" + COMMAND ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}" + DEPENDS ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" COMMENT "Generating ${CIRCLE_OUTPUT_FILE}") list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}") @@ -111,6 +122,8 @@ include("test.lst") # Read "test.local.lst" if exists include("test.local.lst" OPTIONAL) +# NOTE $<TARGET_FILE:luci_readtester> is used as-is as test itself should +# run in target device for cross build also add_test(NAME luci_unit_readtest COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/readverify.sh" "${CMAKE_CURRENT_BINARY_DIR}" diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst index 28ddcf672..94e723f21 100644 --- a/compiler/luci/tests/test.lst +++ b/compiler/luci/tests/test.lst @@ -180,6 +180,8 @@ addread(Sub_000) addread(Sub_U8_000) addread(Sum_000) addread(Sum_001) +addread(SVDF_000) +addread(SVDF_001) addread(Tanh_000) addread(Tanh_U8_000) addread(Tile_000) @@ -403,6 +405,8 @@ addwrite(Sub_000) addwrite(Sub_U8_000) addwrite(Sum_000) addwrite(Sum_001) +addwrite(SVDF_000) +addwrite(SVDF_001) addwrite(Tanh_000) addwrite(Tanh_U8_000) addwrite(Tile_000) diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt index fa05ef0fa..d24717343 100644 --- a/compiler/mio-circle/CMakeLists.txt +++ b/compiler/mio-circle/CMakeLists.txt @@ -1,13 +1,14 @@ -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) + message(STATUS "mio-circle skip: FlatBuffers 2.0 NOT FOUND") return() endif(NOT FlatBuffers_FOUND) message(STATUS "Build mio-circle: TRUE") # TODO Find a better way -set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs") +set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3/circle_schema.fbs") # NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs" add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs" @@ -26,3 +27,10 @@ FlatBuffers_Target(mio_circle # This example shows how to use "mio-circle" library add_executable(mio_circle_example example.cpp) target_link_libraries(mio_circle_example mio_circle) + +file(GLOB_RECURSE SOURCES "src/*.cpp") + +add_library(mio_circle_helper STATIC ${SOURCES}) +target_include_directories(mio_circle_helper PRIVATE src) +target_include_directories(mio_circle_helper PUBLIC include) +target_link_libraries(mio_circle_helper mio_circle) diff --git a/compiler/mio-circle/include/mio_circle/Helper.h b/compiler/mio-circle/include/mio_circle/Helper.h new file mode 100644 index 000000000..c0f8115fe --- /dev/null +++ b/compiler/mio-circle/include/mio_circle/Helper.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_CIRCLE_HELPER_H__ +#define __MIO_CIRCLE_HELPER_H__ + +#include <mio/circle/schema_generated.h> + +namespace mio +{ +namespace circle +{ + +bool is_valid(const ::circle::OperatorCode *opcode); +bool is_custom(const ::circle::OperatorCode *opcode); +std::string opcode_name(const ::circle::OperatorCode *opcode); +const char *tensor_type(const ::circle::Tensor *tensor); +const char *tensor_name(const ::circle::Tensor *tensor); + +} // namespace circle +} // namespace mio + +#endif // __MIO_CIRCLE_HELPER_H__ diff --git a/compiler/mio-circle/src/Helper.cpp b/compiler/mio-circle/src/Helper.cpp new file mode 100644 index 000000000..6f30c8c10 --- /dev/null +++ b/compiler/mio-circle/src/Helper.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Helper.h" + +#include <sstream> + +namespace mio +{ +namespace circle +{ + +bool is_valid(const ::circle::OperatorCode *opcode) +{ + ::circle::BuiltinOperator code = opcode->builtin_code(); + return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX); +} + +bool is_custom(const ::circle::OperatorCode *opcode) +{ + ::circle::BuiltinOperator code = opcode->builtin_code(); + return (code == ::circle::BuiltinOperator_CUSTOM); +} + +std::string opcode_name(const ::circle::OperatorCode *opcode) +{ + assert(opcode); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + if (!opcode->custom_code()) + return "(invalid custom)"; + + std::string custom_op = "CUSTOM("; + custom_op += opcode->custom_code()->c_str(); + custom_op += ")"; + return custom_op; + } + + ::circle::BuiltinOperator code = opcode->builtin_code(); + return ::circle::EnumNameBuiltinOperator(code); +} + +const char *tensor_type(const ::circle::Tensor *tensor) +{ + return ::circle::EnumNameTensorType(tensor->type()); +} + +const char *tensor_name(const ::circle::Tensor *tensor) +{ + static const char *kEmptyTensorName = "(noname)"; + + auto name = tensor->name(); + if (name) + return name->c_str(); + + return kEmptyTensorName; +} + +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle04/CMakeLists.txt b/compiler/mio-circle04/CMakeLists.txt new file mode 100644 index 000000000..8ee6da44c --- /dev/null +++ b/compiler/mio-circle04/CMakeLists.txt @@ -0,0 +1,52 @@ +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) + +if(NOT FlatBuffers_FOUND) + message(STATUS "mio-circle04 skip: FlatBuffers 2.0 NOT FOUND") + return() +endif(NOT FlatBuffers_FOUND) + +message(STATUS "Build mio-circle04: TRUE") + +# TODO Find a better way +# TODO use nnpackage +# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs") +set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs") + +# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs" +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs" + COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + DEPENDS "${SCHEMA_FILE}" +) + +FlatBuffers_Target(mio_circle04 + OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle" + INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen" + SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}" + SCHEMA_FILES "schema.fbs" +) + +# This example shows how to use "mio-circle04" library +add_executable(mio_circle04_example example.cpp) +target_link_libraries(mio_circle04_example mio_circle04) + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(mio_circle04_helper STATIC ${SOURCES}) +set_target_properties(mio_circle04_helper PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(mio_circle04_helper PRIVATE src) +target_include_directories(mio_circle04_helper PUBLIC include) +target_link_libraries(mio_circle04_helper mio_circle04) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(mio_circle04_helper_test ${TESTS}) +target_include_directories(mio_circle04_helper_test PRIVATE src) +target_link_libraries(mio_circle04_helper_test mio_circle04) +target_link_libraries(mio_circle04_helper_test mio_circle04_helper) diff --git a/compiler/mio-circle04/README.md b/compiler/mio-circle04/README.md new file mode 100644 index 000000000..d12dd78ff --- /dev/null +++ b/compiler/mio-circle04/README.md @@ -0,0 +1,3 @@ +# mio-circle04 + +Let's make it easy to read and write Circle models. diff --git a/compiler/mio-circle04/example.cpp b/compiler/mio-circle04/example.cpp new file mode 100644 index 000000000..1970f4066 --- /dev/null +++ b/compiler/mio-circle04/example.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// This example shows how to include and use "mio-circle04" +// +#include <mio/circle/schema_generated.h> + +#include <fstream> +#include <iostream> +#include <vector> + +int main(int argc, char **argv) +{ + std::ifstream ifs(argv[1], std::ios_base::binary); + std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{}); + + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()}; + + if (!circle::VerifyModelBuffer(verifier)) + { + std::cout << "Fail" << std::endl; + return 255; + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h new file mode 100644 index 000000000..d3ffc23e5 --- /dev/null +++ b/compiler/mio-circle04/include/mio_circle/Helper.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_CIRCLE04_HELPER_H__ +#define __MIO_CIRCLE04_HELPER_H__ + +#include <mio/circle/schema_generated.h> + +namespace mio +{ +namespace circle +{ + +::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode); +bool is_valid(const ::circle::OperatorCode *opcode); +bool is_custom(const ::circle::OperatorCode *opcode); +std::string opcode_name(const ::circle::OperatorCode *opcode); +const char *tensor_type(const ::circle::Tensor *tensor); +const char *tensor_name(const ::circle::Tensor *tensor); + +} // namespace circle +} // namespace mio + +#endif // __MIO_CIRCLE04_HELPER_H__ diff --git a/compiler/mio-circle04/src/Helper.cpp b/compiler/mio-circle04/src/Helper.cpp new file mode 100644 index 000000000..8b8737a2d --- /dev/null +++ b/compiler/mio-circle04/src/Helper.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Helper.h" + +#include <algorithm> +#include <sstream> + +namespace mio +{ +namespace circle +{ + +/** + * This will provide v3/v3a/v3b format neutral BuiltinOperator + * NOTE circle has minus value opcode (252~254 as uint8_t) + * we cannot use std::max() like tflite as deprecated_builtin_code can be + * minus and builtin_code being 0 for v0.3 files. + */ +::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode) +{ + assert(opcode != nullptr); + if (opcode->deprecated_builtin_code() == 127) + { + assert(opcode->builtin_code() >= 127); + return opcode->builtin_code(); + } + // There was no 255(-1) value in v0.3 + assert(opcode->deprecated_builtin_code() != -1); + return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code()); +} + +bool is_valid(const ::circle::OperatorCode *opcode) +{ + // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127 + const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code(); + if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN) + return false; + // There was no 255(-1) value in v0.3 + if (deprecated_builtin_code == -1) + return false; + + const ::circle::BuiltinOperator builtin_code = opcode->builtin_code(); + if (!(::circle::BuiltinOperator_MIN <= builtin_code && + builtin_code <= ::circle::BuiltinOperator_MAX)) + return false; + + return true; +} + +bool is_custom(const ::circle::OperatorCode *opcode) +{ + ::circle::BuiltinOperator code = builtin_code_neutral(opcode); + return (code == ::circle::BuiltinOperator_CUSTOM); +} + +std::string opcode_name(const ::circle::OperatorCode *opcode) +{ + assert(opcode); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + if (!opcode->custom_code()) + return "(invalid custom)"; + + std::string custom_op = "CUSTOM("; + custom_op += opcode->custom_code()->c_str(); + custom_op += ")"; + return custom_op; + } + + ::circle::BuiltinOperator code = builtin_code_neutral(opcode); + return ::circle::EnumNameBuiltinOperator(code); +} + +const char *tensor_type(const ::circle::Tensor *tensor) +{ + return ::circle::EnumNameTensorType(tensor->type()); +} + +const char *tensor_name(const ::circle::Tensor *tensor) +{ + if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty()) + return "(noname)"; + + return tensor->name()->c_str(); +} + +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle04/src/Helper.test.cpp b/compiler/mio-circle04/src/Helper.test.cpp new file mode 100644 index 000000000..20fce0843 --- /dev/null +++ b/compiler/mio-circle04/src/Helper.test.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Helper.h" + +#include <flatbuffers/flatbuffers.h> +#include <gtest/gtest.h> + +#include <vector> + +class mio_circle04_helper_test : public ::testing::Test +{ +protected: + void initialization_finish(void) + { + _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec)); + } + +protected: + void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code, + circle::BuiltinOperator builtin_code) + { + _opcodes_vec.push_back(circle::CreateOperatorCodeDirect( + _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code)); + } + + const circle::OperatorCode *get_operator_code(uint8_t idx) + { + return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec; +}; + +TEST_F(mio_circle04_helper_test, v04) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_custom_old) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_NEG) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_under127) +{ + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", circle::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_under127_NEG) +{ + // BuiltinOperator_CONV_2D = 3 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_custom) +{ + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_custom_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_over127) +{ + // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127 + // BuiltinOperator_CUMSUM = 128 + add_operator_code(127, "", circle::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUMSUM); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle04_helper_test, v04_over127_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt index 4660e4003..90187b037 100644 --- a/compiler/mio-tflite/CMakeLists.txt +++ b/compiler/mio-tflite/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)") diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt index 39f4d9a31..f2cfeafcc 100644 --- a/compiler/mio-tflite260/CMakeLists.txt +++ b/compiler/mio-tflite260/CMakeLists.txt @@ -1,7 +1,7 @@ -nnas_find_package(FlatBuffers EXACT 1.12 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) - message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)") + message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 2.0)") return() endif(NOT FlatBuffers_FOUND) @@ -47,3 +47,23 @@ endif(NOT TensorFlowGEMMLowpSource_FOUND) add_library(mio_tflite260_inc INTERFACE) target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}") target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}") + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(mio_tflite260_helper STATIC ${SOURCES}) +target_include_directories(mio_tflite260_helper PRIVATE src) +target_include_directories(mio_tflite260_helper PUBLIC include) +target_link_libraries(mio_tflite260_helper mio_tflite260) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(mio_tflite260_helper_test ${TESTS}) +target_include_directories(mio_tflite260_helper_test PRIVATE src) +target_link_libraries(mio_tflite260_helper_test mio_tflite260) +target_link_libraries(mio_tflite260_helper_test mio_tflite260_helper) diff --git a/compiler/mio-tflite260/include/mio_tflite260/Helper.h b/compiler/mio-tflite260/include/mio_tflite260/Helper.h new file mode 100644 index 000000000..cb027e604 --- /dev/null +++ b/compiler/mio-tflite260/include/mio_tflite260/Helper.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_TFLITE260_HELPER_H__ +#define __MIO_TFLITE260_HELPER_H__ + +#include <mio/tflite/schema_generated.h> + +namespace mio +{ +namespace tflite +{ + +::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode); +bool is_valid(const ::tflite::OperatorCode *opcode); +bool is_custom(const ::tflite::OperatorCode *opcode); +std::string opcode_name(const ::tflite::OperatorCode *opcode); +const char *tensor_type(const ::tflite::Tensor *tensor); +const char *tensor_name(const ::tflite::Tensor *tensor); + +} // namespace tflite +} // namespace mio + +#endif // __MIO_TFLITE260_HELPER_H__ diff --git a/compiler/mio-tflite260/src/Helper.cpp b/compiler/mio-tflite260/src/Helper.cpp new file mode 100644 index 000000000..9669058ea --- /dev/null +++ b/compiler/mio-tflite260/src/Helper.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_tflite260/Helper.h" + +#include <sstream> + +namespace mio +{ +namespace tflite +{ + +/** + * This will provide v3/v3a format neutral BuiltinOperator + * + * This function referenced + * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc + */ +::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + return std::max(opcode->builtin_code(), + static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code())); +} + +bool is_valid(const ::tflite::OperatorCode *opcode) +{ + // Valid Range : 0 <= deprecated_builtin_code <= 127 + const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code(); + if (deprecated_builtin_code < 0) + return false; + + const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code(); + if (!(::tflite::BuiltinOperator_MIN <= builtin_code && + builtin_code <= ::tflite::BuiltinOperator_MAX)) + return false; + + return true; +} + +bool is_custom(const ::tflite::OperatorCode *opcode) +{ + ::tflite::BuiltinOperator code = builtin_code_neutral(opcode); + return (code == ::tflite::BuiltinOperator_CUSTOM); +} + +std::string opcode_name(const ::tflite::OperatorCode *opcode) +{ + assert(opcode); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + if (!opcode->custom_code()) + return "(invalid custom)"; + + std::string custom_op = "CUSTOM("; + custom_op += opcode->custom_code()->c_str(); + custom_op += ")"; + return custom_op; + } + + ::tflite::BuiltinOperator code = builtin_code_neutral(opcode); + return ::tflite::EnumNameBuiltinOperator(code); +} + +const char *tensor_type(const ::tflite::Tensor *tensor) +{ + return ::tflite::EnumNameTensorType(tensor->type()); +} + +const char *tensor_name(const ::tflite::Tensor *tensor) +{ + static const char *kEmptyTensorName = "(noname)"; + + auto name = tensor->name(); + if (name) + return name->c_str(); + + return kEmptyTensorName; +} + +} // namespace tflite +} // namespace mio diff --git a/compiler/mio-tflite260/src/Helper.test.cpp b/compiler/mio-tflite260/src/Helper.test.cpp new file mode 100644 index 000000000..e1ef04ca7 --- /dev/null +++ b/compiler/mio-tflite260/src/Helper.test.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_tflite260/Helper.h" + +#include <flatbuffers/flatbuffers.h> +#include <gtest/gtest.h> + +#include <vector> + +class mio_tflite260_helper_test : public ::testing::Test +{ +protected: + void initialization_finish(void) + { + _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec)); + } + +protected: + void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code, + tflite::BuiltinOperator builtin_code) + { + _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect( + _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code)); + } + + const tflite::OperatorCode *get_operator_code(uint8_t idx) + { + return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec; +}; + +/** + * Extended 'builtin_code' is not in TFLite schema v3. + * + * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to + * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31 + */ +TEST_F(mio_tflite260_helper_test, v3) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3_custom) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3_NEG) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_under127) +{ + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_under127_NEG) +{ + // BuiltinOperator_CONV_2D = 3 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_custom) +{ + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_custom_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_over127) +{ + // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127 + // BuiltinOperator_CUMSUM = 128 + add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUMSUM); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite260_helper_test, v3a_over127_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} diff --git a/compiler/mio-tflite280/CMakeLists.txt b/compiler/mio-tflite280/CMakeLists.txt new file mode 100644 index 000000000..f48711eb7 --- /dev/null +++ b/compiler/mio-tflite280/CMakeLists.txt @@ -0,0 +1,69 @@ +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) + +if(NOT FlatBuffers_FOUND) + message(STATUS "Build mio-tflite280: FAILED (missing Flatbuffers 2.0)") + return() +endif(NOT FlatBuffers_FOUND) + +nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET) + +if(NOT TensorFlowSource_FOUND) + message(STATUS "Build mio-tflite280: FAILED (missing TensorFlowSource 2.8.0)") + return() +endif(NOT TensorFlowSource_FOUND) + +message(STATUS "Build mio-tflite280: TRUE") + +set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs") + +# NOTE Use copy of schema.fbs as to provide unified way for circle also +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs" + COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + DEPENDS "${SCHEMA_FILE}" +) + +FlatBuffers_Target(mio_tflite280 + OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite" + INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen" + SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}" + SCHEMA_FILES "schema.fbs" +) + +add_executable(mio_tflite280_example example.cpp) +target_link_libraries(mio_tflite280_example mio_tflite280) + +# Temporay tflite validation tool to replace nnkit-tflite +# TODO provide full tflite validation with runtime/interpreter +add_executable(mio_tflite280_validate example.cpp) +target_link_libraries(mio_tflite280_validate mio_tflite280) + +nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET) + +if(NOT TensorFlowGEMMLowpSource_FOUND) + return() +endif(NOT TensorFlowGEMMLowpSource_FOUND) + +add_library(mio_tflite280_inc INTERFACE) +target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}") +target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}") + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(mio_tflite280_helper STATIC ${SOURCES}) +target_include_directories(mio_tflite280_helper PRIVATE src) +target_include_directories(mio_tflite280_helper PUBLIC include) +target_link_libraries(mio_tflite280_helper mio_tflite280) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(mio_tflite280_helper_test ${TESTS}) +target_include_directories(mio_tflite280_helper_test PRIVATE src) +target_link_libraries(mio_tflite280_helper_test mio_tflite280) +target_link_libraries(mio_tflite280_helper_test mio_tflite280_helper) diff --git a/compiler/mio-tflite280/README.md b/compiler/mio-tflite280/README.md new file mode 100644 index 000000000..73219a7df --- /dev/null +++ b/compiler/mio-tflite280/README.md @@ -0,0 +1,3 @@ +# mio-tflite280 + +_mio-tflite280_ provides a library to access TensorFlow lite model files with V2.8.0. diff --git a/compiler/mio-tflite280/example.cpp b/compiler/mio-tflite280/example.cpp new file mode 100644 index 000000000..83356b943 --- /dev/null +++ b/compiler/mio-tflite280/example.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// This example shows how to include and use "mio-tflite280" +// +#include <mio/tflite/schema_generated.h> + +#include <fstream> +#include <iostream> +#include <vector> + +int main(int argc, char **argv) +{ + std::ifstream ifs(argv[1], std::ios_base::binary); + std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{}); + + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()}; + + if (!tflite::VerifyModelBuffer(verifier)) + { + std::cout << "Fail" << std::endl; + return 255; + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/compiler/mio-tflite280/include/mio_tflite280/Helper.h b/compiler/mio-tflite280/include/mio_tflite280/Helper.h new file mode 100644 index 000000000..b0fb0ace7 --- /dev/null +++ b/compiler/mio-tflite280/include/mio_tflite280/Helper.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_TFLITE280_HELPER_H__ +#define __MIO_TFLITE280_HELPER_H__ + +#include <mio/tflite/schema_generated.h> + +namespace mio +{ +namespace tflite +{ + +::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode); +bool is_valid(const ::tflite::OperatorCode *opcode); +bool is_custom(const ::tflite::OperatorCode *opcode); +std::string opcode_name(const ::tflite::OperatorCode *opcode); +const char *tensor_type(const ::tflite::Tensor *tensor); +const char *tensor_name(const ::tflite::Tensor *tensor); + +} // namespace tflite +} // namespace mio + +#endif // __MIO_TFLITE280_HELPER_H__ diff --git a/compiler/mio-tflite280/src/Helper.cpp b/compiler/mio-tflite280/src/Helper.cpp new file mode 100644 index 000000000..ebf0bd140 --- /dev/null +++ b/compiler/mio-tflite280/src/Helper.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_tflite280/Helper.h" + +#include <sstream> + +namespace mio +{ +namespace tflite +{ + +/** + * This will provide v3/v3a format neutral BuiltinOperator + * + * This function referenced + * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc + */ +::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + return std::max(opcode->builtin_code(), + static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code())); +} + +bool is_valid(const ::tflite::OperatorCode *opcode) +{ + // Valid Range : 0 <= deprecated_builtin_code <= 127 + const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code(); + if (deprecated_builtin_code < 0) + return false; + + const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code(); + if (!(::tflite::BuiltinOperator_MIN <= builtin_code && + builtin_code <= ::tflite::BuiltinOperator_MAX)) + return false; + + return true; +} + +bool is_custom(const ::tflite::OperatorCode *opcode) +{ + ::tflite::BuiltinOperator code = builtin_code_neutral(opcode); + return (code == ::tflite::BuiltinOperator_CUSTOM); +} + +std::string opcode_name(const ::tflite::OperatorCode *opcode) +{ + assert(opcode); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + if (!opcode->custom_code()) + return "(invalid custom)"; + + std::string custom_op = "CUSTOM("; + custom_op += opcode->custom_code()->c_str(); + custom_op += ")"; + return custom_op; + } + + ::tflite::BuiltinOperator code = builtin_code_neutral(opcode); + return ::tflite::EnumNameBuiltinOperator(code); +} + +const char *tensor_type(const ::tflite::Tensor *tensor) +{ + return ::tflite::EnumNameTensorType(tensor->type()); +} + +const char *tensor_name(const ::tflite::Tensor *tensor) +{ + static const char *kEmptyTensorName = "(noname)"; + + auto name = tensor->name(); + if (name) + return name->c_str(); + + return kEmptyTensorName; +} + +} // namespace tflite +} // namespace mio diff --git a/compiler/mio-tflite280/src/Helper.test.cpp b/compiler/mio-tflite280/src/Helper.test.cpp new file mode 100644 index 000000000..df573bf44 --- /dev/null +++ b/compiler/mio-tflite280/src/Helper.test.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_tflite280/Helper.h" + +#include <flatbuffers/flatbuffers.h> +#include <gtest/gtest.h> + +#include <vector> + +class mio_tflite280_helper_test : public ::testing::Test +{ +protected: + void initialization_finish(void) + { + _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec)); + } + +protected: + void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code, + tflite::BuiltinOperator builtin_code) + { + _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect( + _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code)); + } + + const tflite::OperatorCode *get_operator_code(uint8_t idx) + { + return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec; +}; + +/** + * Extended 'builtin_code' is not in TFLite schema v3. + * + * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to + * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31 + */ +TEST_F(mio_tflite280_helper_test, v3) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3_custom) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3_NEG) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_under127) +{ + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_under127_NEG) +{ + // BuiltinOperator_CONV_2D = 3 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_custom) +{ + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_custom_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_over127) +{ + // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127 + // BuiltinOperator_CUMSUM = 128 + add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)), + tflite::BuiltinOperator_CUMSUM); + ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0))); +} + +TEST_F(mio_tflite280_helper_test, v3a_over127_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0))); +} diff --git a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt index e6eb13b93..04c22055e 100644 --- a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt +++ b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt @@ -112,6 +112,10 @@ target_include_directories(mir_onnx_importer PUBLIC ../../include/mir_onnx_impor target_include_directories(mir_onnx_importer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(mir_onnx_importer PUBLIC mir mir_onnx_proto PRIVATE mir_interpreter nncc_common) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_find_package(GTest REQUIRED) file(GLOB_RECURSE TEST_SOURCES "*.test.cpp") diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt index 42eb4f8a5..6c6c28a32 100644 --- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt +++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED) +nnas_find_package(FlatBuffers EXACT 2.0 REQUIRED) if (NOT FlatBuffers_FOUND) return() diff --git a/compiler/mir2loco/CMakeLists.txt b/compiler/mir2loco/CMakeLists.txt index a8a096ef4..217f1bd15 100644 --- a/compiler/mir2loco/CMakeLists.txt +++ b/compiler/mir2loco/CMakeLists.txt @@ -8,11 +8,11 @@ target_include_directories(mir2loco PUBLIC include) target_link_libraries(mir2loco PUBLIC mir) target_link_libraries(mir2loco PUBLIC loco) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) GTest_AddTest(mir2loco_test ${TESTS}) target_link_libraries(mir2loco_test mir2loco) diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt index 7c42761ba..95669264f 100644 --- a/compiler/moco-tf/CMakeLists.txt +++ b/compiler/moco-tf/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries(moco_tf_frontend PRIVATE locomotiv) target_link_libraries(moco_tf_frontend PRIVATE plier_tf) target_link_libraries(moco_tf_frontend PRIVATE locoex_customop) target_link_libraries(moco_tf_frontend PRIVATE logo) +target_link_libraries(moco_tf_frontend PRIVATE logo_ex) target_link_libraries(moco_tf_frontend PRIVATE oops) install(TARGETS moco_tf_frontend DESTINATION lib) @@ -46,4 +47,5 @@ target_link_libraries(moco_tf_frontend_test moco_tf_frontend) target_link_libraries(moco_tf_frontend_test plier_tf) target_link_libraries(moco_tf_frontend_test locoex_customop) target_link_libraries(moco_tf_frontend_test logo) +target_link_libraries(moco_tf_frontend_test logo_ex) add_test(moco_tf_frontend_test moco_tf_frontend_test) diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake index 90590e374..71755556c 100644 --- a/compiler/moco-tf/requires.cmake +++ b/compiler/moco-tf/requires.cmake @@ -9,5 +9,6 @@ require("mio-tf") require("plier-tf") require("locoex-customop") require("logo") +require("logo-ex") require("oops") require("bino") diff --git a/compiler/moco-tf/src/Transforms.h b/compiler/moco-tf/src/Transforms.h index f14b81675..a197a796e 100644 --- a/compiler/moco-tf/src/Transforms.h +++ b/compiler/moco-tf/src/Transforms.h @@ -21,6 +21,7 @@ #include "Transforms/TypeInferencePass.h" #include <logo/Passes.h> +#include <logo/PassesEx.h> #include <moco/Pass/Passes.h> #endif // __MOCO_TF_TRANSFORMS_H__ diff --git a/compiler/morph/CMakeLists.txt b/compiler/morph/CMakeLists.txt index ec7da8d30..5a5ae2623 100644 --- a/compiler/morph/CMakeLists.txt +++ b/compiler/morph/CMakeLists.txt @@ -8,11 +8,11 @@ target_include_directories(morph PUBLIC include) target_link_libraries(morph PRIVATE nncc_common) target_link_libraries(morph PUBLIC angkor) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) add_executable(morph_test ${TESTS}) target_link_libraries(morph_test morph) diff --git a/compiler/nest/core/CMakeLists.txt b/compiler/nest/core/CMakeLists.txt index b603f9ae9..4f17db3b4 100644 --- a/compiler/nest/core/CMakeLists.txt +++ b/compiler/nest/core/CMakeLists.txt @@ -15,11 +15,11 @@ foreach(EXAMPLE_FILE IN ITEMS ${EXAMPLE_FILES}) target_link_libraries(${TARGET_NAME} nest_core) endforeach(EXAMPLE_FILE) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) add_executable(nest_core_test ${TESTS}) target_link_libraries(nest_core_test gtest_main) diff --git a/compiler/nike/CMakeLists.txt b/compiler/nike/CMakeLists.txt index 737c73b8f..6bd3199e3 100644 --- a/compiler/nike/CMakeLists.txt +++ b/compiler/nike/CMakeLists.txt @@ -5,11 +5,11 @@ list(REMOVE_ITEM SOURCES ${TESTS}) add_library(nike STATIC ${SOURCES}) target_include_directories(nike PUBLIC include) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) GTest_AddTest(nike_test ${TESTS}) target_link_libraries(nike_test nike) diff --git a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp index d38385e91..c2135c4be 100644 --- a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp +++ b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp @@ -22,6 +22,8 @@ #include <gtest/gtest.h> +#include <algorithm> + using namespace std; using namespace nnc; using namespace mir; diff --git a/compiler/nnop/CMakeLists.txt b/compiler/nnop/CMakeLists.txt index 82c0e3a86..d2c8af26d 100644 --- a/compiler/nnop/CMakeLists.txt +++ b/compiler/nnop/CMakeLists.txt @@ -2,11 +2,11 @@ add_library(nnop INTERFACE) target_include_directories(nnop INTERFACE include) target_link_libraries(nnop INTERFACE angkor) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) file(GLOB_RECURSE TESTS "src/*.test.cpp") diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt index 729bfa80a..8732340ae 100644 --- a/compiler/one-cmds/CMakeLists.txt +++ b/compiler/one-cmds/CMakeLists.txt @@ -14,6 +14,11 @@ set(ONE_COMMAND_FILES onecc ) +# pytorch importer is an experimental feature, it is not used in default configuration +if(ENABLE_ONE_IMPORT_PYTORCH) + list(APPEND ONE_COMMAND_FILES one-import-pytorch) +endif(ENABLE_ONE_IMPORT_PYTORCH) + foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES}) set(ONE_COMMAND_FILE ${ONE_COMMAND}) @@ -41,6 +46,7 @@ set(ONE_UTILITY_FILES one-build.template.cfg onecc.template.cfg utils.py + onnx_legalizer.py ) foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES}) @@ -66,6 +72,39 @@ foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES}) endforeach(ONE_UTILITY) +# make python directory +set(ONE_PYTHON_FILES constant.py + make_cmd.py) + +foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES}) + + set(ONE_PYTHON_DIR "onelib") + set(ONE_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}") + set(ONE_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}") + set(ONE_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}") + set(ONE_PYTHON_TARGET "${ONE_PYTHON_FILE}_target") + + add_custom_command(OUTPUT ${ONE_PYTHON_DIR_BIN} + COMMAND ${CMAKE_COMMAND} -E make_directory "${ONE_PYTHON_DIR_BIN}" + COMMENT "Generate ${ONE_PYTHON_DIR_BIN}" + ) + + add_custom_command(OUTPUT ${ONE_PYTHON_FILE_BIN} + COMMAND ${CMAKE_COMMAND} -E copy "${ONE_PYTHON_FILE_SRC}" "${ONE_PYTHON_FILE_BIN}" + DEPENDS ${ONE_PYTHON_SRC} + COMMENT "Generate ${ONE_PYTHON_FILE_BIN}" + ) + + add_custom_target(${ONE_PYTHON_TARGET} ALL DEPENDS ${ONE_PYTHON_DIR_BIN} ${ONE_PYTHON_FILE_BIN}) + + install(DIRECTORY ${ONE_PYTHON_DIR} + FILE_PERMISSIONS OWNER_WRITE OWNER_READ + GROUP_READ + WORLD_READ + DESTINATION bin) + +endforeach(ONE_PYTHON_FILE) + set(ONE_DOCUMENT_FILES how-to-use-one-commands.txt how-to-prepare-virtualenv.txt diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt index 6d846c081..8d6007f38 100644 --- a/compiler/one-cmds/how-to-prepare-virtualenv.txt +++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt @@ -5,7 +5,7 @@ Last update: 2020-09-15 This document explains about 'one-prepare-venv' command. -'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu +'one-prepare-venv' will prepare python3.8 virtual environment with tensorflow-cpu version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf' command can execute properly. @@ -20,7 +20,7 @@ Please install these required packages before venv preparation. $ sudo apt-get update $ sudo apt-get upgrade -$ sudo apt-get install python3-pip python3-venv +$ sudo apt-get install python3.8 python3-pip python3.8-venv How to run for Ubuntu @@ -36,18 +36,9 @@ There will be venv folder as of result. How to run for Windows ---------------------- -1. First, please prepare Python 3.5-3.7 -2. Open the Command Prompt as an administrator -3. cd(change directory) to the directory where one-compiler is installed -4. run below command -``` -$ ONE\install\bin> python -m venv venv -$ ONE\install\bin> cd venv/Scripts -$ ONE\install\bin\venv/Scripts> pip.exe install -U pip -$ ONE\install\bin\venv/Scripts> pip.exe install -U tensorflow-cpu==2.3.0 -``` - -After running the above command, go back to MinGW and run one-compiler. +Support for Windows is not maintained for now. +If you have any needs for running in Windows, please fire an issue. +Or you can use Docker for Windows. Trouble shooting diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index 0a0c4b14c..ebc165167 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -155,6 +155,7 @@ Current transformation options are - fold_cast : This removes Cast operation which can be folded - fold_dequantize : This removes Dequantize operation which can be folded - fold_dwconv : This folds Depthwise Convolution operation which can be folded +- fold_gather : This removes Gather operation which can be folded - fold_sparse_to_dense : This removes SparseToDense operation which can be folded - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition - fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible @@ -178,6 +179,7 @@ Current transformation options are - generate_profile_data : This will turn on profiling data generation. - remove_fakequant : This will remove all fakequant operators. - remove_quantdequant : This will remove all Quantize-Dequantize sequence. +- remove_redundant_quantize : This removes redundant quantize operators. - remove_redundant_reshape : This fuses or removes redundant reshape operators. - remove_redundant_transpose : This fuses or removes redundant transpose operators. - remove_unnecessary_reshape : This removes unnecessary reshape operators. diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build index 90dfa77b8..5c313b44b 100644 --- a/compiler/one-cmds/one-build +++ b/compiler/one-cmds/one-build @@ -154,25 +154,31 @@ def main(): config = _parse_cfg(args) # verify configuration file - drivers = [ - 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx', - 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen' + bin_dir = os.path.dirname(os.path.realpath(__file__)) + import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) + transform_drivers = [ + 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' ] - _verify_cfg(drivers, config) + _verify_cfg(import_drivers_dict, config) # verify optimization option file _verify_opt(args) # get sections to run section_to_run = [] - for d in drivers: + for d in list(import_drivers_dict) + transform_drivers: if _is_available_driver(config, d): section_to_run.append(d) # run dir_path = os.path.dirname(os.path.realpath(__file__)) for section in section_to_run: - driver_path = os.path.join(dir_path, _get_driver_name(section)) + if section in import_drivers_dict: + # we already has driver name in dict + driver_name = import_drivers_dict[section] + else: + driver_name = _get_driver_name(section) + driver_path = os.path.join(dir_path, driver_name) cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section] if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'): cmd += ['-O', getattr(args, 'O')] diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq index 9aef6270e..ef89a9297 100644 --- a/compiler/one-cmds/one-import-bcq +++ b/compiler/one-cmds/one-import-bcq @@ -25,6 +25,7 @@ import subprocess import sys import tempfile +import onelib.make_cmd as _make_cmd import utils as _utils import generate_bcq_output_arrays as _bcq_info_gen @@ -32,6 +33,10 @@ import generate_bcq_output_arrays as _bcq_info_gen sys.tracebacklimit = 0 +def get_driver_cfg_section(): + return "one-import-bcq" + + def _get_parser(): parser = argparse.ArgumentParser( description='command line tool to convert TensorFlow with BCQ to circle') @@ -155,7 +160,7 @@ def _convert(args): tmpdir, os.path.splitext( os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite' - tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, generate_bcq_metadata_output_path, tf2tfliteV2_output_path) try: @@ -171,7 +176,7 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') - tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path, + tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, tf2tfliteV2_output_path, getattr(args, 'output_path')) diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx index 1c0c5498e..eaa136197 100644 --- a/compiler/one-cmds/one-import-onnx +++ b/compiler/one-cmds/one-import-onnx @@ -27,12 +27,25 @@ import tempfile import onnx import onnx_tf +# ONNX legalizer is an optional feature +# It enables conversion of some operations, but in experimental phase for now +try: + import onnx_legalizer + _onnx_legalizer_enabled = True +except ImportError: + _onnx_legalizer_enabled = False + +import onelib.make_cmd as _make_cmd import utils as _utils # TODO Find better way to suppress trackback on error sys.tracebacklimit = 0 +def get_driver_cfg_section(): + return "one-import-onnx" + + def _get_parser(): parser = argparse.ArgumentParser( description='command line tool to convert ONNX to circle') @@ -64,6 +77,10 @@ def _get_parser(): tf2tfliteV2_group.add_argument('--model_format', default='saved_model') tf2tfliteV2_group.add_argument('--converter_version', default='v2') + parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') + parser.add_argument( + '--unroll_lstm', action='store_true', help='Unroll LSTM operators') + # save intermediate file(s) parser.add_argument( '--save_intermediate', @@ -120,6 +137,11 @@ def _convert(args): tmpdir = os.path.dirname(logfile_path) # convert onnx to tf saved model onnx_model = onnx.load(getattr(args, 'input_path')) + if _onnx_legalizer_enabled: + options = onnx_legalizer.LegalizeOptions + options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn') + options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm') + onnx_legalizer.legalize(onnx_model, options) tf_savedmodel = onnx_tf.backend.prepare(onnx_model) savedmodel_name = os.path.splitext(os.path.basename( @@ -133,7 +155,7 @@ def _convert(args): args.output_path))[0] + '.tflite' tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name) - tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd( + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd( args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path) f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode()) @@ -143,7 +165,7 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') - tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path, + tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, tf2tfliteV2_output_path, getattr(args, 'output_path')) diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch new file mode 100644 index 000000000..dbf1ba6d7 --- /dev/null +++ b/compiler/one-cmds/one-import-pytorch @@ -0,0 +1,366 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import importlib +import inspect +import os +import sys +import tempfile +import torch +import onnx +import onnx_tf +import json +import zipfile + +import onnx_legalizer +import onelib.make_cmd as _make_cmd +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +def get_driver_spec(): + return ("one-import-pytorch", _utils.DriverType.IMPORTER) + + +def _get_parser(): + parser = argparse.ArgumentParser( + description='command line tool to convert PyTorch to Circle') + + _utils._add_default_arg(parser) + + ## converter arguments + converter_group = parser.add_argument_group('converter arguments') + + # input and output path. + converter_group.add_argument( + '-i', '--input_path', type=str, help='full filepath of the input file') + converter_group.add_argument( + '-p', '--python_path', type=str, help='full filepath of the python model file') + converter_group.add_argument( + '-o', '--output_path', type=str, help='full filepath of the output file') + + # input arrays. + converter_group.add_argument( + '-s', + '--input_shapes', + type=str, + help= + 'Shapes corresponding to --input_arrays, colon-separated.(ex:\"1,4,4,3:1,20,20,3\")' + ) + converter_group.add_argument( + '-t', + '--input_types', + type=str, + help='data types of input tensors, colon-separated (ex: float32, uint8, int32)') + + # fixed options + tf2tflite_group = parser.add_argument_group('tf2tfliteV2 arguments') + tf2tflite_group.add_argument('--model_format', default='saved_model') + tf2tflite_group.add_argument('--converter_version', default='v2') + + parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') + parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators') + + # save intermediate file(s) + parser.add_argument( + '--save_intermediate', + action='store_true', + help='Save intermediate files to output folder') + + return parser + + +def _verify_arg(parser, args): + """verify given arguments""" + # check if required arguments is given + missing = [] + if not _utils._is_valid_attr(args, 'input_path'): + missing.append('-i/--input_path') + if not _utils._is_valid_attr(args, 'output_path'): + missing.append('-o/--output_path') + if not _utils._is_valid_attr(args, 'input_shapes'): + missing.append('-s/--input_shapes') + if not _utils._is_valid_attr(args, 'input_types'): + missing.append('-t/--input_types') + + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + + +def _parse_arg(parser): + args = parser.parse_args() + # print version + if args.version: + _utils._print_version_and_exit(__file__) + + return args + + +def _apply_verbosity(verbosity): + # NOTE + # TF_CPP_MIN_LOG_LEVEL + # 0 : INFO + WARNING + ERROR + FATAL + # 1 : WARNING + ERROR + FATAL + # 2 : ERROR + FATAL + # 3 : FATAL + if verbosity: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' + else: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + + +def _parse_shapes(shapes_str): + shapes = [] + for shape_str in shapes_str.split(":"): + if shape_str != "": + shapes += [list(map(int, shape_str.split(",")))] + else: + shapes += [[]] + return shapes + + +def _parse_types(types_str): + # There are no convenient way to create torch from string ot numpy dtype, so using this workaround + dtype_dict = { + "bool": torch.bool, + "uint8": torch.uint8, + "int8": torch.int8, + "int16": torch.int16, + "int32": torch.int32, + "int64": torch.int64, + "float16": torch.float16, + "float32": torch.float32, + "float64": torch.float64, + "complex64": torch.complex64, + "complex128": torch.complex128 + } + array = types_str.split(",") + types = [dtype_dict[type_str.strip()] for type_str in array] + return types + + +# merge contents of module into global namespace +def _merge_module(module): + # is there an __all__? if so respect it + if "__all__" in module.__dict__: + names = module.__dict__["__all__"] + else: + # otherwise we import all names that don't begin with _ + names = [x for x in module.__dict__ if not x.startswith("_")] + globals().update({k: getattr(module, k) for k in names}) + + +def _list_classes_from_module(module): + # Parsing the module to get all defined classes + is_member = lambda member: inspect.isclass(member) and member.__module__ == module.__name__ + classes = [cls[1] for cls in inspect.getmembers(module, is_member)] + return classes + + +def _extract_pytorch_model(log_file, parameters_path, python_path): + log_file.write(('Trying to load saved model\n').encode()) + python_model_path = os.path.abspath(python_path) + module_name = os.path.basename(python_model_path) + module_dir = os.path.dirname(python_model_path) + sys.path.append(module_dir) + log_file.write(('Trying to load given python module\n').encode()) + module_loader = importlib.machinery.SourceFileLoader(module_name, python_model_path) + module_spec = importlib.util.spec_from_loader(module_name, module_loader) + python_model_module = importlib.util.module_from_spec(module_spec) + + try: + module_loader.exec_module(python_model_module) + except: + raise ValueError('Failed to execute given python model file') + + log_file.write(('Model python module is loaded\n').encode()) + try: + # this branch assumes this parameters_path contains state_dict + state_dict = torch.load(parameters_path) + log_file.write(('Trying to find model class and fill it`s state dict\n').encode()) + model_class_definitions = _list_classes_from_module(python_model_module) + if len(model_class_definitions) != 1: + raise ValueError("Expected only one class as model definition. {}".format( + model_class_definitions)) + pytorch_model_class = model_class_definitions[0] + model = pytorch_model_class() + model.load_state_dict(state_dict) + return model + except: + # this branch assumes this parameters_path contains "entire" model + _merge_module(python_model_module) + log_file.write(('Model python module is merged into main environment\n').encode()) + model = torch.load(parameters_path) + log_file.write(('Pytorch model loaded\n').encode()) + return model + + +def _extract_torchscript_model(log_file, input_path): + # assuming this is a pytorch script + log_file.write(('Trying to load TorchScript model\n').encode()) + try: + pytorch_model = torch.jit.load(input_path) + return pytorch_model + except RuntimeError as e: + log_file.write((str(e) + '\n').encode()) + log_file.write( + 'Failed to import input file. Maybe this it contains only weights? Try pass "python_path" argument\n'. + encode()) + raise + log_file.write(('TorchScript model is loaded\n').encode()) + + +def _extract_mar_model(log_file, tmpdir, input_path): + mar_dir_path = os.path.join(tmpdir, 'mar') + with zipfile.ZipFile(input_path) as zip_input: + zip_input.extractall(path=mar_dir_path) + manifest_path = os.path.join(mar_dir_path, 'MAR-INF/MANIFEST.json') + with open(manifest_path) as manifest_file: + manifest = json.load(manifest_file) + serialized_file = os.path.join(mar_dir_path, manifest['model']['serializedFile']) + if 'modelFile' in manifest['model']: + model_file = os.path.join(mar_dir_path, manifest['model']['modelFile']) + return _extract_pytorch_model(log_file, serialized_file, model_file) + else: + return _extract_torchscript_model(log_file, serialized_file) + + +def _convert(args): + _apply_verbosity(args.verbose) + + # get file path to log + dir_path = os.path.dirname(os.path.realpath(__file__)) + logfile_path = os.path.realpath(args.output_path) + '.log' + with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir: + # save intermediate + if _utils._is_valid_attr(args, 'save_intermediate'): + tmpdir = os.path.dirname(logfile_path) + # convert pytorch to onnx model + input_path = getattr(args, 'input_path') + model_file = getattr(args, 'python_path') + + if input_path[-4:] == '.mar': + pytorch_model = _extract_mar_model(f, tmpdir, input_path) + elif model_file is None: + pytorch_model = _extract_torchscript_model(f, input_path) + else: + pytorch_model = _extract_pytorch_model(f, input_path, model_file) + + input_shapes = _parse_shapes(getattr(args, 'input_shapes')) + input_types = _parse_types(getattr(args, 'input_types')) + + if len(input_shapes) != len(input_types): + raise ValueError('number of input shapes and input types must be equal') + + sample_inputs = [] + for input_spec in zip(input_shapes, input_types): + sample_inputs += [torch.ones(input_spec[0], dtype=input_spec[1])] + + f.write(('Trying to inference loaded model').encode()) + sample_outputs = pytorch_model(*sample_inputs) + f.write(('Acquired sample outputs\n').encode()) + + onnx_output_name = os.path.splitext(os.path.basename( + args.output_path))[0] + '.onnx' + onnx_output_path = os.path.join(tmpdir, onnx_output_name) + + onnx_saved = False + # some operations are not supported in early opset versions, try several + for onnx_opset_version in range(9, 15): + f.write(('Trying to save onnx model using opset version ' + + str(onnx_opset_version) + '\n').encode()) + try: + torch.onnx.export( + pytorch_model, + tuple(sample_inputs), + onnx_output_path, + example_outputs=sample_outputs, + opset_version=onnx_opset_version) + onnx_saved = True + break + except: + f.write(('attempt failed\n').encode()) + + if not onnx_saved: + raise ValueError('Failed to save temporary onnx model') + + # convert onnx to tf saved mode + onnx_model = onnx.load(onnx_output_path) + + options = onnx_legalizer.LegalizeOptions() + options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn') + options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm') + onnx_legalizer.legalize(onnx_model, options) + + tf_savedmodel = onnx_tf.backend.prepare(onnx_model) + + savedmodel_name = os.path.splitext(os.path.basename( + args.output_path))[0] + '.savedmodel' + savedmodel_output_path = os.path.join(tmpdir, savedmodel_name) + tf_savedmodel.export_graph(savedmodel_output_path) + + # make a command to convert from tf to tflite + tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py') + tf2tfliteV2_output_name = os.path.splitext(os.path.basename( + args.output_path))[0] + '.tflite' + tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name) + + del args.input_shapes + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd( + args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path) + + f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode()) + + # convert tf to tflite + _utils._run(tf2tfliteV2_cmd, logfile=f) + + # make a command to convert from tflite to circle + tflite2circle_path = os.path.join(dir_path, 'tflite2circle') + tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, + tf2tfliteV2_output_path, + getattr(args, 'output_path')) + + f.write((' '.join(tflite2circle_cmd) + '\n').encode()) + + # convert tflite to circle + _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f) + + +def main(): + # parse arguments + parser = _get_parser() + args = _parse_arg(parser) + + # parse configuration file + _utils._parse_cfg(args, 'one-import-pytorch') + + # verify arguments + _verify_arg(parser, args) + + # convert + _convert(args) + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index e2294caa6..999255a34 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -25,9 +25,14 @@ import subprocess import sys import tempfile +import onelib.make_cmd as _make_cmd import utils as _utils +def get_driver_cfg_section(): + return "one-import-tf" + + def _get_parser(): parser = argparse.ArgumentParser( description='command line tool to convert TensorFlow to circle') @@ -146,7 +151,7 @@ def _convert(args): tf2tfliteV2_output_path = os.path.join( tmpdir, os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite' - tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, getattr(args, 'input_path'), tf2tfliteV2_output_path) @@ -157,7 +162,7 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') - tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path, + tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, tf2tfliteV2_output_path, getattr(args, 'output_path')) diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite index 7eee0484a..2d756bff6 100644 --- a/compiler/one-cmds/one-import-tflite +++ b/compiler/one-cmds/one-import-tflite @@ -24,12 +24,17 @@ import os import subprocess import sys +import onelib.make_cmd as _make_cmd import utils as _utils # TODO Find better way to suppress trackback on error sys.tracebacklimit = 0 +def get_driver_cfg_section(): + return "one-import-tflite" + + def _get_parser(): parser = argparse.ArgumentParser( description='command line tool to convert TensorFlow lite to circle') @@ -77,7 +82,7 @@ def _convert(args): with open(logfile_path, 'wb') as f: # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') - tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path, + tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, getattr(args, 'input_path'), getattr(args, 'output_path')) diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index a64abff19..8b1f3f7be 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -24,6 +24,8 @@ import os import subprocess import sys +import onelib.constant as _constant +import onelib.make_cmd as _make_cmd import utils as _utils # TODO Find better way to suppress trackback on error @@ -60,7 +62,7 @@ def _get_parser(): '-o', '--output_path', type=str, help='full filepath of the output file') # optimization pass - for opt in _utils._CONSTANT.OPTIMIZATION_OPTS: + for opt in _constant.CONSTANT.OPTIMIZATION_OPTS: # opt = (option_name, help_message) circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1]) @@ -99,7 +101,7 @@ def _optimize(args): with open(logfile_path, 'wb') as f: # make a command to optimize circle model circle2circle_path = os.path.join(dir_path, 'circle2circle') - circle2circle_cmd = _utils._make_circle2circle_cmd(args, circle2circle_path, + circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path, getattr(args, 'input_path'), getattr(args, 'output_path')) diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv index 285191761..0f75166a7 100644 --- a/compiler/one-cmds/one-prepare-venv +++ b/compiler/one-cmds/one-prepare-venv @@ -26,16 +26,17 @@ VENV_PYTHON=${DRIVER_PATH}/venv/bin/python if [ ! -f ${VENV_ACTIVATE} ]; then # Create python virtual enviornment - python3 -m venv "${DRIVER_PATH}/venv" + python3.8 -m venv "${DRIVER_PATH}/venv" fi # NOTE version # - https://github.com/onnx/onnx/blob/master/docs/Versioning.md # - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md -VER_TENSORFLOW=2.3.0 -VER_ONNX=1.10.1 -VER_ONNX_TF=1.9.0 +VER_TENSORFLOW=2.8.0 +VER_ONNX=1.11.0 +VER_ONNXRUNTIME=1.11.0 +VER_ONNX_TF=1.10.0 # Install tensorflow @@ -54,18 +55,32 @@ if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} " fi -# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0' -# NOTE adding version is for temporary hotfix of setuptools 50.x.y version -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install -U pip==20.2.1 setuptools==49.3.0 -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW} -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2 +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools +if [ -n "${EXT_TENSORFLOW_WHL}" ]; then + ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL} +else + ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW} +fi +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability # Install PyTorch and ONNX related -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html +# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL. +# torch_stable.html points to download URL of torch wheel file(s) +# but sometimes the server gets unstable, especially from in-house CI. +TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html" +if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then + TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}" +fi +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL} + +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} + +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME} # Provide install of custom onnx-tf if [ -n "${EXT_ONNX_TF_WHL}" ]; then - ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} ${EXT_ONNX_TF_WHL} + ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL} else - ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF} + ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF} fi diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize index 22d4ddb0e..f2eff24bd 100644 --- a/compiler/one-cmds/one-quantize +++ b/compiler/one-cmds/one-quantize @@ -119,6 +119,18 @@ def _get_parser(): help= "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max." ) + quantization_group.add_argument( + '--TF-style_maxpool', + action='store_true', + help= + "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)" + ) + quantization_group.add_argument( + '--quant_config', + type=str, + help= + "Path to the quantization configuration file." + ) # arguments for force_quantparam option force_quantparam_group = parser.add_argument_group( @@ -137,6 +149,19 @@ def _get_parser(): force_quantparam_group.add_argument( '--zero_point', type=int, action='append', help='zero point (int)') + # arguments for copy_quantparam option + copy_quantparam_group = parser.add_argument_group( + 'arguments for copy_quantparam option') + + copy_quantparam_group.add_argument( + '--copy_quantparam', + action='store_true', + help='copy quantparam (scale, zero_point) of a tensor to another tensor.') + copy_quantparam_group.add_argument( + '--src_tensor_name', type=str, action='append', help='tensor name (string)') + copy_quantparam_group.add_argument( + '--dst_tensor_name', type=str, action='append', help='tensor name (string)') + return parser @@ -171,6 +196,11 @@ def _verify_arg(parser, args): missing.append('--scale') if not _utils._is_valid_attr(args, 'zero_point'): missing.append('--zero_point') + if _utils._is_valid_attr(args, 'copy_quantparam'): + if not _utils._is_valid_attr(args, 'src_tensor_name'): + missing.append('--src_tensor_name') + if not _utils._is_valid_attr(args, 'dst_tensor_name'): + missing.append('--dst_tensor_name') if len(missing): parser.error('the following arguments are required: ' + ' '.join(missing)) if _utils._is_valid_attr(args, 'force_quantparam'): @@ -180,6 +210,12 @@ def _verify_arg(parser, args): if len(tensors) != len(scales) or len(tensors) != len(zerops): parser.error( 'The same number of tensor_name, scale, and zero_point should be given.') + if _utils._is_valid_attr(args, 'copy_quantparam'): + src_tensors = getattr(args, 'src_tensor_name') + dst_tensors = getattr(args, 'dst_tensor_name') + if len(src_tensors) != len(dst_tensors): + parser.error( + 'The same number of src_tensor_name and dst_tensor_name should be given.') def _parse_arg(parser): @@ -197,6 +233,11 @@ def _quantize(args): _write_qparam(args) return + if _utils._is_valid_attr(args, 'copy_quantparam'): + # copy quantization parameters + _copy_qparam(args) + return + # get file path to log dir_path = os.path.dirname(os.path.realpath(__file__)) logfile_path = os.path.realpath(args.output_path) + '.log' @@ -294,12 +335,19 @@ def _quantize(args): circle_quantizer_cmd.append(getattr(args, 'quantized_dtype')) if _utils._is_valid_attr(args, 'granularity'): circle_quantizer_cmd.append(getattr(args, 'granularity')) + if _utils._is_valid_attr(args, 'TF-style_maxpool'): + circle_quantizer_cmd.append('--TF-style_maxpool') if _utils._is_valid_attr(args, 'input_type'): circle_quantizer_cmd.append('--input_type') circle_quantizer_cmd.append(getattr(args, 'input_type')) if _utils._is_valid_attr(args, 'output_type'): circle_quantizer_cmd.append('--output_type') circle_quantizer_cmd.append(getattr(args, 'output_type')) + if _utils._is_valid_attr(args, 'quant_config'): + # NOTE --config conflicts with --config option in onecc, so + # we use quant_config for one-quantize + circle_quantizer_cmd.append('--config') + circle_quantizer_cmd.append(getattr(args, 'quant_config')) # input and output path circle_quantizer_cmd.append(tmp_output_path_2) if _utils._is_valid_attr(args, 'output_path'): @@ -351,6 +399,40 @@ def _write_qparam(args): _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) +def _copy_qparam(args): + # get file path to log + dir_path = os.path.dirname(os.path.realpath(__file__)) + logfile_path = os.path.realpath(args.output_path) + '.log' + + with open(logfile_path, 'wb') as f: + # get driver path + circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') + + # make a command to write qparams to the tensors + circle_quantizer_cmd = [circle_quantizer_path] + # verbose + if _utils._is_valid_attr(args, 'verbose'): + circle_quantizer_cmd.append('--verbose') + if _utils._is_valid_attr(args, 'src_tensor_name'): + src_tensor_name = getattr(args, 'src_tensor_name') + if _utils._is_valid_attr(args, 'dst_tensor_name'): + dst_tensor_name = getattr(args, 'dst_tensor_name') + for (src, dst) in zip(src_tensor_name, dst_tensor_name): + circle_quantizer_cmd.append('--copy_quantparam') + circle_quantizer_cmd.append(src) + circle_quantizer_cmd.append(dst) + # input and output path + if _utils._is_valid_attr(args, 'input_path'): + circle_quantizer_cmd.append(getattr(args, 'input_path')) + if _utils._is_valid_attr(args, 'output_path'): + circle_quantizer_cmd.append(getattr(args, 'output_path')) + + f.write((' '.join(circle_quantizer_cmd) + '\n').encode()) + + # run circle-quantizer + _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) + + def main(): # parse arguments parser = _get_parser() diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc index ca440d852..25682ff4b 100644 --- a/compiler/one-cmds/onecc +++ b/compiler/one-cmds/onecc @@ -104,10 +104,6 @@ def _verify_arg(parser, args): def _get_driver_name(driver_name): return { - 'one-import-bcq': 'one-import-bcq', - 'one-import-tf': 'one-import-tf', - 'one-import-tflite': 'one-import-tflite', - 'one-import-onnx': 'one-import-onnx', 'one-optimize': 'one-optimize', 'one-quantize': 'one-quantize', 'one-pack': 'one-pack', @@ -130,19 +126,15 @@ def _is_available_driver(config, driver_name): 'onecc', driver_name) -def _verify_cfg(driver_list, config): +def _verify_cfg(import_driver_list, config): if not config.has_section('onecc'): raise ImportError('[onecc] section is required in configuration file') import_driver_cnt = 0 - if _is_available_driver(config, 'one-import-tf'): - import_driver_cnt += 1 - if _is_available_driver(config, 'one-import-tflite'): - import_driver_cnt += 1 - if _is_available_driver(config, 'one-import-bcq'): - import_driver_cnt += 1 - if _is_available_driver(config, 'one-import-onnx'): - import_driver_cnt += 1 + for d in import_driver_list: + if _is_available_driver(config, d): + import_driver_cnt += 1 + if import_driver_cnt > 1: raise AssertionError('Only one import-* driver can be executed') @@ -170,22 +162,27 @@ def main(): config = _parse_cfg(args) # verify configuration file - drivers = [ - 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx', + bin_dir = os.path.dirname(os.path.realpath(__file__)) + import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) + transform_drivers = [ 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' ] - _verify_cfg(drivers, config) + _verify_cfg(import_drivers_dict, config) # get sections to run section_to_run = [] - for d in drivers: + for d in list(import_drivers_dict) + transform_drivers: if _is_available_driver(config, d): section_to_run.append(d) # run dir_path = os.path.dirname(os.path.realpath(__file__)) for section in section_to_run: - driver_name = _get_driver_name(section) + if section in import_drivers_dict: + # we already has driver name in dict + driver_name = import_drivers_dict[section] + else: + driver_name = _get_driver_name(section) options = ['--config', getattr(args, 'config'), '--section', section] if _utils._is_valid_attr(args, 'verbose'): options.append('--verbose') diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py new file mode 100644 index 000000000..7ddd7382d --- /dev/null +++ b/compiler/one-cmds/onelib/constant.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class CONSTANT: + __slots__ = () # This prevents access via __dict__. + OPTIMIZATION_OPTS = ( + # (OPTION_NAME, HELP_MESSAGE) + ('O1', 'enable O1 optimization pass'), + ('convert_nchw_to_nhwc', + 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.' + ), + ('expand_broadcast_const', 'expand broadcastable constant node inputs'), + ('nchw_to_nhwc_input_shape', + 'convert the input shape of the model (argument for convert_nchw_to_nhwc)'), + ('nchw_to_nhwc_output_shape', + 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'), + ('fold_add_v2', 'fold AddV2 op with constant inputs'), + ('fold_cast', 'fold Cast op with constant input'), + ('fold_dequantize', 'fold Dequantize op'), + ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), + ('fold_gather', 'fold Gather op'), + ('fold_sparse_to_dense', 'fold SparseToDense op'), + ('forward_reshape_to_unaryop', 'Forward Reshape op'), + ('fuse_add_with_tconv', 'fuse Add op to Transposed'), + ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'), + ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'), + ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'), + ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'), + ('fuse_bcq', 'apply Binary Coded Quantization'), + ('fuse_preactivation_batchnorm', + 'fuse BatchNorm operators of pre-activations to Convolution op'), + ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'), + ('fuse_transpose_with_mean', + 'fuse Mean with a preceding Transpose under certain conditions'), + ('make_batchnorm_gamma_positive', + 'make negative gamma of BatchNorm to a small positive value (1e-10).' + ' Note that this pass can change the execution result of the model.' + ' So, use it only when the impact is known to be acceptable.'), + ('fuse_activation_function', 'fuse Activation function to a preceding operator'), + ('fuse_instnorm', 'fuse ops to InstanceNorm operator'), + ('replace_cw_mul_add_with_depthwise_conv', + 'replace channel-wise Mul/Add with DepthwiseConv2D'), + ('remove_fakequant', 'remove FakeQuant ops'), + ('remove_quantdequant', 'remove Quantize-Dequantize sequence'), + ('remove_redundant_quantize', 'remove redundant Quantize ops'), + ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'), + ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'), + ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'), + ('remove_unnecessary_slice', 'remove unnecessary slice ops'), + ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'), + ('remove_unnecessary_split', 'remove unnecessary split ops'), + ('resolve_customop_add', 'convert Custom(Add) op to Add op'), + ('resolve_customop_batchmatmul', + 'convert Custom(BatchMatmul) op to BatchMatmul op'), + ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'), + ('resolve_customop_max_pool_with_argmax', + 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'), + ('shuffle_weight_to_16x1float32', + 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.' + ' Note that it only converts weights whose row is a multiple of 16'), + ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'), + ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'), + ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'), + ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'), + ('substitute_strided_slice_to_reshape', + 'convert certain condition StridedSlice to Reshape'), + ('substitute_transpose_to_reshape', + 'convert certain condition Transpose to Reshape'), + ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'), + ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op')) + + +CONSTANT = CONSTANT() diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py new file mode 100644 index 000000000..d8380f28d --- /dev/null +++ b/compiler/one-cmds/onelib/make_cmd.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +import onelib.constant as _constant + +def _is_valid_attr(args, attr): + return hasattr(args, attr) and getattr(args, attr) + + +def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path): + """make a command for running tf2tfliteV2.py""" + cmd = [sys.executable, os.path.expanduser(driver_path)] + # verbose + if _is_valid_attr(args, 'verbose'): + cmd.append('--verbose') + # model_format + if _is_valid_attr(args, 'model_format_cmd'): + cmd.append(getattr(args, 'model_format_cmd')) + elif _is_valid_attr(args, 'model_format'): + cmd.append('--' + getattr(args, 'model_format')) + else: + cmd.append('--graph_def') # default value + # converter version + if _is_valid_attr(args, 'converter_version_cmd'): + cmd.append(getattr(args, 'converter_version_cmd')) + elif _is_valid_attr(args, 'converter_version'): + cmd.append('--' + getattr(args, 'converter_version')) + else: + cmd.append('--v1') # default value + # input_path + if _is_valid_attr(args, 'input_path'): + cmd.append('--input_path') + cmd.append(os.path.expanduser(input_path)) + # output_path + if _is_valid_attr(args, 'output_path'): + cmd.append('--output_path') + cmd.append(os.path.expanduser(output_path)) + # input_arrays + if _is_valid_attr(args, 'input_arrays'): + cmd.append('--input_arrays') + cmd.append(getattr(args, 'input_arrays')) + # input_shapes + if _is_valid_attr(args, 'input_shapes'): + cmd.append('--input_shapes') + cmd.append(getattr(args, 'input_shapes')) + # output_arrays + if _is_valid_attr(args, 'output_arrays'): + cmd.append('--output_arrays') + cmd.append(getattr(args, 'output_arrays')) + + return cmd + + +def make_tflite2circle_cmd(driver_path, input_path, output_path): + """make a command for running tflite2circle""" + cmd = [driver_path, input_path, output_path] + return [os.path.expanduser(c) for c in cmd] + + +def make_circle2circle_cmd(args, driver_path, input_path, output_path): + """make a command for running circle2circle""" + cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]] + # profiling + if _is_valid_attr(args, 'generate_profile_data'): + cmd.append('--generate_profile_data') + # optimization pass(only true/false options) + # TODO support options whose number of arguments is more than zero + for opt in _constant.CONSTANT.OPTIMIZATION_OPTS: + if _is_valid_attr(args, opt[0]): + # ./driver --opt[0] + if type(getattr(args, opt[0])) is bool: + cmd.append('--' + opt[0]) + """ + This condition check is for config file interface, usually would be + SomeOption=True + but user can write as follows while development + SomeOption=False + instead of removing SomeOption option + """ + if type(getattr(args, opt[0])) is str and not getattr( + args, opt[0]).lower() in ['false', '0', 'n']: + cmd.append('--' + opt[0]) + + return cmd diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py new file mode 100755 index 000000000..26c2b75b9 --- /dev/null +++ b/compiler/one-cmds/onnx_legalizer.py @@ -0,0 +1,1065 @@ +#!/usr/bin/python3 + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import onnx +import onnx.numpy_helper +import sys +import numpy as np +import re + +# Transform onnx model to make it compilable with our toolchain +# +# This code works with onnx model in proto format. See proto buffers format in +# https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3 +# +# More examples of handling onnx models could be found here: +# https://github.com/onnx/onnx/tree/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/examples +# +# List of transformations: +# - Replace RNN operation with unrolled subgraph +# - Replace LSTM operation with unrolled subgraph + + +class LegalizeOptions: + """Controls transformations that legalizer apply + + Attributes: + unroll_rnn (bool): default is False. If True - unrolls RNN operations + unroll_lstm (bool): default is False. If True - unrolls LSTM operations + """ + + unroll_rnn = False + unroll_lstm = False + + +def _reverse_str(s): + return ''.join(reversed(s)) + + +def _parse_tensor_name(name): + """Splits tensor name to base part and serial number + + Most of tensor names have following format: "tensor_123". + This function breaks name into two values: "tensor_" and 123. + Tensor names like this: "321" are broken into "" and 321. + + Serial number is used to create unique tensor names using given base name. + + Args: + name (str): tensor name + + Returns: + tuple of str, int: base name and serial number of tensor + """ + rev = _reverse_str(name) + m = re.match('(\d*)(.*)', rev) + if m.groups()[0] != '': + return (_reverse_str(m.groups()[1]), int(_reverse_str(m.groups()[0]))) + else: + return (_reverse_str(m.groups()[1]), 0) + + +class _ModelTransformerHelper: + """Helper for onnx model transformation + + This helper is used for convenient operation replacement in onnx model + + Attributes: + _model (onnx.onnx_ml_pb2.ModelProto): target model that should be changed + _nodes_to_delete (list of onnx.onnx_ml_pb2.NodeProto): list of replaced operations + _insert_id (int): position to insert created operations (should be in topologically sorted) + _base_name_idx (dict from str to int): maps tensor "base" name to + largest existing serial num. For example model has tensors "t_1", "t_2", "t_4", + in that case _base_name_idx["t_"] == 4. + This attribute is used for unique tensor name generation. + """ + + def __init__(self, model): + self._model = model + self._nodes_to_delete = [] + self._insert_id = 0 + # each tensor has name containing base name and unique number. for example: + # "abc_123": "abs_" - base name, "123" - unique number + # if no number in name, consider it is equal to "0" + + # mapping from base names to largest given number + self._base_name_idx = {} + # gather name information for existing tensors + for node in model.graph.node: + for t in list(node.input) + list(node.output): + base_name, number = _parse_tensor_name(t) + if base_name in self._base_name_idx: + self._base_name_idx[base_name] = max(self._base_name_idx[base_name], + number) + else: + self._base_name_idx[base_name] = number + + def make_tensor_with_base_name(self, base_name): + """ Create unique name for given base_name + + Args: + base_name (str): base tensor name + + Returns: + str : unique tensor name that starts with base_name + """ + if base_name in self._base_name_idx: + self._base_name_idx[base_name] += 1 + return base_name + str(self._base_name_idx[base_name]) + else: + self._base_name_idx[base_name] = 0 + return base_name + '0' + + def make_node(self, opcode, inputs, outputs, *p_args, **k_args): + """Create arbitrary node and insert it in graph. + + Args: + opcode (str): opcode name of desired operation + inputs (list of str): names of input tensors + outputs (list of str or int): names of existing tensors to use as output tensors for operation or + number of tensors that should be created + p_args: additional arguments for onnx make_node helper + k_args: attributes for onnx node + + Returns: + list of str: list of output tensor names + """ + if type(outputs) == int: + outputs = [self.make_tensor_with_base_name('') for i in range(outputs)] + assert (type(outputs) == list) + node = onnx.helper.make_node(opcode, inputs, outputs, *p_args, **k_args) + self._model.graph.node.insert(self._insert_id, node) + self._insert_id += 1 + return outputs + + def make_split(self, input, split_sizes, axis): + """Create Split operation and insert it in graph. + + Args: + input (str): name of input tensor + split_sizes (list of int): list of split sizes + axis (int): number of axis to split + + Returns: + list: list of output tensor names + """ + return self.make_node( + 'Split', [input], len(split_sizes), axis=axis, split=split_sizes) + + def make_concat(self, inputs, axis): + """Create Concat operation and insert it in graph. + + Args: + inputs (list of str): list of tensors names to concat + axis (int): axis number to concat + + Returns: + str: output tensor name + """ + return self.make_node('Concat', inputs, 1, axis=axis)[0] + + def make_squeeze(self, input, axes): + """Create Squeeze operation and insert it in graph. + + Args: + input (str): name of input tensor + axes (list of int): list of dimension containing ones to remove + + Returns: + str: output tensor name + """ + return self.make_node('Squeeze', [input], 1, axes=axes)[0] + + def make_unsqueeze(self, input, axes): + """Create Unsqueeze operation and insert it in graph. + + Args: + input (str): name of input tensor + axes (list of int): list of dimension to insert ones + + Returns: + str: output tensor name + """ + return self.make_node('Unsqueeze', [input], 1, axes=axes)[0] + + def make_gemm(self, A, B, C, trans_a=False, trans_b=False): + """Create Gemm operation and insert it in graph. + + Result tensor contains A*B + C + + Args: + A (str): name of tensor A + B (str): name of tensor B + C (str): name of tensor C + transA (bool): if True, transpose tensor A before multiplication + transB (bool): if True, transpose tensor B before multiplication + + Returns: + str: output tensor name + """ + return self.make_node( + 'Gemm', [A, B, C], 1, transA=bool(trans_a), transB=bool(trans_b))[0] + + def make_add(self, a, b): + """Creates Add operation and insert it in graph. + + Args: + a (str): name of left operand tensor + b (str): name of right operand tensor + + Returns: + str: output tensor name + """ + return self.make_node('Add', [a, b], 1)[0] + + def make_mul(self, a, b): + """Creates Mul operation and insert it in graph. + + Args: + a (str): name of left operand tensor + b (str): name of right operand tensor + + Returns: + str: output tensor name + """ + return self.make_node('Mul', [a, b], 1)[0] + + def make_clip(self, input, min, max): + """Create Clip operation and insert it in graph. + + Args: + input (str): input tensor name + min (int/float): lower clip bound + max (int/float ): upper clip bound + + Returns: + str: output tensor name + """ + return self.make_node('Clip', [input], 1, min=min, max=max)[0] + + def make_act(self, input, act_name): + """Create activation function operation and insert it in graph. + + Args: + input (str): input tensor name + act_name (str): name of activation function, one of ['Relu', 'Tanh', 'Sigmoid'] + + Returns: + str: output tensor name + """ + assert (act_name in ['Relu', 'Tanh', 'Sigmoid']) + return self.make_node(act_name, [input], 1)[0] + + def make_constant_tensor(self, tensor_data, base_name): + """Creates onnx constant tensor + + Args: + tensor_data (numpy.ndarray): tensor data + base_name (str): prefix of constant tensor name + + Returns: + str: name of created constant tensor + """ + tensor = onnx.numpy_helper.from_array(tensor_data) + tensor.name = self.make_tensor_with_base_name(base_name) + self._model.graph.initializer.append(tensor) + return tensor.name + + def mark_for_deletion(self, node): + self._nodes_to_delete += [node] + + def get_insert_id(self): + return self._insert_id + + def set_insert_id(self, insert_id): + self._insert_id = insert_id + + def delete_marked_nodes(self): + for node in self._nodes_to_delete: + self._model.graph.node.remove(node) + + +class _TensorInfo: + def __init__(self, dtype, shape): + self.dtype = dtype + self.shape = shape + + +def _get_tensor_infos(model): + """Infer tensor shapes and dtypes + Args: + model (onnx.onnx_ml_pb2.ModelProto): model to process + + Returns: + dict from str to _TensorInfo: maps tensor name to shape and dtype information + """ + + inferred_shape_model = onnx.shape_inference.infer_shapes(model) + + infos = {} + for tensor in list(inferred_shape_model.graph.value_info) + list( + inferred_shape_model.graph.input): + info = _TensorInfo(tensor.type.tensor_type.elem_type, []) + for dim in tensor.type.tensor_type.shape.dim: + info.shape += [dim.dim_value] + infos[tensor.name] = info + + for tensor in list(model.graph.initializer): + infos[tensor.name] = _TensorInfo(tensor.data_type, tensor.dims) + return infos + + +def _dtype_to_np(dtype): + """Convert onnx dtype value to numpy dtype class + + For more types see: + https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3#L484 + + Args: + dtype (int): onnx dtype + + Returns: + numpy data type: numpy dtype, like np.float32 + """ + + if dtype == 1: + return np.float32 + else: + raise NotImplementedError('unsupported data type') + + +def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name): + """Generate subgraph of one direction of unrolled RNN layer + + Args: + transformer (_ModelTransformerHelper): helper for model generation + X (list of str): names of input tensors in sequence. Tensor shapes: [batch_size, input_size]. + W (str): name of weight tensor + R (str): name of recurrence weight tensor + B (str): name of bias tensor + initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size] + clip (float or None): range which clips input of activations + act (str): activation function + """ + # one direction RNN: + # + # For details see: + # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#RNN-7 + # + # H = f(X*(W^T) + h*(R^T) + B) + # + # H - new hidden state + # h - previous hidden state + # X - current input + # W - input weights matrix + # R - reccurent weights matrix + # Wb - input weights matmul bias + # Rb - reccurent weights matmul bias + # f - activation function + + seq_length = len(X) + first_iter = 0 + state_tensors = [] + if initial_h is not None: + previous_state_tensor = initial_h + else: + first_iter = 1 + state_tensor = transformer.make_gemm(X[0], W, B, trans_b=True) + if clip != None: + state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip) + previous_state_tensor = transformer.make_act(state_tensor, activation_name) + state_tensors += [previous_state_tensor] + + for i in range(first_iter, seq_length): + state_tensor = transformer.make_gemm(X[i], W, B, trans_b=True) + state_tensor = transformer.make_gemm( + previous_state_tensor, R, state_tensor, trans_b=True) + if clip != None: + state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip) + previous_state_tensor = transformer.make_act(state_tensor, activation_name) + state_tensors += [previous_state_tensor] + return state_tensors + + +def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation, + clip, direction, hidden_size, layout): + """Generate Simple (forward or reverse) unrolled RNN + + Args: + transformer (_ModelTransformerHelper): transformation helper + original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional RNN operation to unroll + x (list of str): list of input tensors (input tensor split along "time" dimension) + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + activation (str): name of activation function + clip (float or None): range which clips input of activations + direction (str): "forward" or "reverse" + hidden_size (int): size of hidden state + layout (int): See attribute description: + https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56 + """ + + inputs = original_node.input + outputs = original_node.output + if direction == 'reverse': + x.reverse() + w = transformer.make_squeeze(inputs[1], axes=[0]) + r = transformer.make_squeeze(inputs[2], axes=[0]) + if len(inputs) > 3 and inputs[3] != '': + raw_bias_tensor = transformer.make_squeeze(inputs[3], axes=[0]) + splitted_bias_tensors = transformer.make_split( + raw_bias_tensor, split_sizes=[hidden_size] * 2, axis=0) + b = transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1]) + else: + data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype) + b = transformer.make_constant_tensor( + np.zeros(hidden_size, dtype=data_type), "zero_bias") + if len(inputs) > 5 and inputs[5] != '': + direction_dim = layout + initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim]) + else: + initial_h = None + state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip, + activation) + y_direction_dim = layout + 1 + y_h_direction_dim = layout + state_layout_tensors = [] + seq_length_dim = layout + for state in state_tensors: + state_layout_tensors += [ + transformer.make_unsqueeze(state, axes=[seq_length_dim, y_direction_dim]) + ] + + # use low-level interface to attach to existing tensors + Y_h = outputs[1] + transformer.make_node( + 'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) + Y = outputs[0] + transformer.make_node( + 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + + +def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations, + clip, hidden_size, layout): + """Generate Bidirectional unrolled RNN + + Args: + transformer (_ModelTransformerHelper): transformation helper + original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional RNN operation to unroll + x (list of str): list of input tensors (input tensor split along "time" dimension) + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + activations (list of str): list of len (2) containing names of forward and reverse activations + clip (float or None): range which clips input of activations + hidden_size (int): size of hidden state + layout (int): See attribute description: + https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56 + """ + + inputs = original_node.input + outputs = original_node.output + w_bi = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0) + r_bi = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0) + w = [] + r = [] + for d in range(2): + w += [transformer.make_squeeze(w_bi[d], axes=[0])] + r += [transformer.make_squeeze(r_bi[d], axes=[0])] + + b = [] + if len(inputs) > 3 and inputs[3] != '': + raw_bias_tensors = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0) + for d in range(2): + raw_bias_tensors_squeezed = transformer.make_squeeze( + raw_bias_tensors[d], axes=[0]) + splitted_bias_tensors = transformer.make_split( + raw_bias_tensors_squeezed, split_sizes=[hidden_size] * 2, axis=0) + b += [ + transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1]) + ] + else: + data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype) + b = [ + transformer.make_constant_tensor( + np.zeros(hidden_size, dtype=data_type), "zero_bias") + ] * 2 + initial_h = [None, None] + if len(inputs) > 5 and inputs[5] != '': + direction_dim = layout + initial_h = transformer.make_split( + inputs[5], split_sizes=[1, 1], axis=direction_dim) + for d in range(2): + initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim]) + + state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0], + initial_h[0], clip, activations[0]) + x.reverse() + state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1], + initial_h[1], clip, activations[1]) + state_b_tensors.reverse() + + y_direction_dim = layout + 1 + y_h_direction_dim = layout + state_layout_tensors = [] + seq_length_dim = layout + seq_length = len(x) + for t in range(seq_length): + state_f = state_f_tensors[t] + state_b = state_b_tensors[t] + state_layout_tensors_f = transformer.make_unsqueeze( + state_f, axes=[seq_length_dim, y_direction_dim]) + state_layout_tensors_b = transformer.make_unsqueeze( + state_b, axes=[seq_length_dim, y_direction_dim]) + state_layout_tensors += [ + transformer.make_concat( + [state_layout_tensors_f, state_layout_tensors_b], axis=y_direction_dim) + ] + + last_f_state_layout_tensor = transformer.make_unsqueeze( + state_f_tensors[-1], axes=[y_h_direction_dim]) + last_b_state_layout_tensor = transformer.make_unsqueeze( + state_b_tensors[0], axes=[y_h_direction_dim]) + + # use low-level interface to attach to existing tensors + Y_h = outputs[1] + transformer.make_node( + 'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h], + axis=y_h_direction_dim) + + Y = outputs[0] + transformer.make_node( + 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + + +def _legalize_RNN(transformer, tensor_infos, node): + """Unroll RNN operation + + Args: + transformer (_ModelTransformerHelper): transformation helper + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + node (onnx.onnx_ml_pb2.NodeProto): RNN operation to unroll + """ + inputs = node.input + if len(inputs) > 4 and inputs[4] != '': + raise NotImplementedError('Variadic length of output is not supported') + # attributes + activation_alpha = [] + activation_beta = [] + activations = ['Tanh', 'Tanh'] + clip = None + direction = 'forward' + hidden_size = 0 + layout = 0 + + for attr in node.attribute: + if attr.name == 'activation_alpha': + activation_alpha = attr.floats + if attr.name == 'activation_beta': + activation_beta = attr.floats + if attr.name == 'activations': + activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings))) + if attr.name == 'clip': + clip = attr.f + if attr.name == 'direction': + direction = attr.s.decode('UTF-8') + if attr.name == 'hidden_size': + hidden_size = attr.i + if attr.name == 'layout': + layout = attr.i + + if len(activation_alpha) > 0 or len(activation_beta) > 0: + raise NotImplementedError('Unsupported parameters for LSTM activations') + + for act in activations: + if act not in ['Relu', 'Tanh', 'Sigmoid']: + raise NotImplementedError('Unsupported activation function') + + seq_length_dim = layout + seq_length = tensor_infos[inputs[0]].shape[seq_length_dim] + if hidden_size == 0: + hidden_size = tensor_infos[inputs[2]].shape[2] + + input_split_tensor = transformer.make_split( + inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim) + x = [] + for i in range(len(input_split_tensor)): + input_frame_tensor = input_split_tensor[i] + squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0]) + x += [squeezed_frame_tensor] + + if direction in ['forward', 'reverse']: + _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0], + clip, direction, hidden_size, layout) + elif direction == 'bidirectional': + _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip, + hidden_size, layout) + else: + raise RuntimeError('Unknown RNN type') + + transformer.mark_for_deletion(node) + + +def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip, + act, dtype, hidden_size, batch_size): + """Generate subgraph for one direction of unrolled LSTM layer + + Args: + transformer (_ModelTransformerHelper): helper for model generation + X (list of str): names of tensors in input sequence. Each tensor shape: [batch_size, input_size] + W (str): name of concatenated weight tensor: [input, output, forget, cell] + R (str): name of concatenated recurrence weights tensor: [input, output, forget, cell] + B (str): name of concatenated bias tensor: [input, output, forget, cell] + initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size] + initial_c (str or None): name of tensor containing initial cell state. Shape [batch_size, hidden_size] + P (str or None): name of concatenated peephole tensor: [input, output, forget] + clip (float or None): range which clips input of activations + act (dict of str): activation functions {'f': 'Sigmoid', 'g': 'Tanh', 'h': 'Tanh'} + dtype (numpy dtype): data type used in created LSTM operation + hidden_size (int): hidden dimension + batch_size (int): batch dimension + """ + # one direction LSTM: + # + # For details see: + # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#LSTM-7 + # + # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi) + # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf) + # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc) + # Ct = ft (.) Ct-1 + it (.) ct + # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo) + # Ht = ot (.) h(Ct) + # + # X - input tensor + # i - input gate + # o - output gate + # f - forget gate + # c - cell gate + # t - time step (t-1 means previous time step) + # W[iofc] - W parameter weight matrix for input, output, forget, and cell gates + # R[iofc] - R recurrence weight matrix for input, output, forget, and cell gates + # Wb[iofc] - W bias vectors for input, output, forget, and cell gates + # Rb[iofc] - R bias vectors for input, output, forget, and cell gates + # P[iof] - P peephole weight vector for input, output, and forget gates + # WB[iofc] - W parameter weight matrix for backward input, output, forget, and cell gates + # RB[iofc] - R recurrence weight matrix for backward input, output, forget, and cell gates + # WBb[iofc] - W bias vectors for backward input, output, forget, and cell gates + # RBb[iofc] - R bias vectors for backward input, output, forget, and cell gates + # PB[iof] - P peephole weight vector for backward input, output, and forget gates + # H - Hidden state + + seq_length = len(X) + state_h_tensors = [] + + w_tensors = transformer.make_split(W, split_sizes=[hidden_size] * 4, axis=0) + W = {'i': w_tensors[0], 'o': w_tensors[1], 'f': w_tensors[2], 'c': w_tensors[3]} + + r_tensors = transformer.make_split(R, split_sizes=[hidden_size] * 4, axis=0) + R = {'i': r_tensors[0], 'o': r_tensors[1], 'f': r_tensors[2], 'c': r_tensors[3]} + + if B is not None: + separate_b_tensors = transformer.make_split( + B, split_sizes=[hidden_size] * 8, axis=0) + b_tensors = [] + for i in range(4): + b_tensors += [ + transformer.make_add(separate_b_tensors[i], separate_b_tensors[i + 4]) + ] + else: + b_tensors = [ + transformer.make_constant_tensor( + np.zeros((hidden_size), dtype=dtype), 'zero_b') + ] * 4 + B = {'i': b_tensors[0], 'o': b_tensors[1], 'f': b_tensors[2], 'c': b_tensors[3]} + + if initial_h is not None: + previous_h_state_tensor = initial_h + else: + previous_h_state_tensor = transformer.make_constant_tensor( + np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_h') + + if initial_c is not None: + previous_c_state_tensor = initial_c + else: + previous_c_state_tensor = transformer.make_constant_tensor( + np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_c') + + if P is not None: + p_tensors = transformer.make_split(P, split_sizes=[hidden_size] * 3, axis=0) + P = {'i': p_tensors[0], 'o': p_tensors[1], 'f': p_tensors[2]} + else: + zero = transformer.make_constant_tensor( + np.zeros((hidden_size), dtype=dtype), 'zero_peephole') + P = {'i': zero, 'o': zero, 'f': zero} + + for i in range(seq_length): + # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi) + it = transformer.make_gemm(X[i], W['i'], B['i'], trans_b=True) + it = transformer.make_gemm(previous_h_state_tensor, R['i'], it, trans_b=True) + peephole_it = transformer.make_mul(P['i'], previous_c_state_tensor) + it = transformer.make_add(it, peephole_it) + if clip is not None: + it = transformer.make_clip(it, min=-clip, max=clip) + it = transformer.make_act(it, act['f']) + + # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf) + ft = transformer.make_gemm(X[i], W['f'], B['f'], trans_b=True) + ft = transformer.make_gemm(previous_h_state_tensor, R['f'], ft, trans_b=True) + peephole_ft = transformer.make_mul(P['f'], previous_c_state_tensor) + ft = transformer.make_add(ft, peephole_ft) + if clip is not None: + ft = transformer.make_clip(ft, min=-clip, max=clip) + ft = transformer.make_act(ft, act['f']) + + # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc) + ct = transformer.make_gemm(X[i], W['c'], B['c'], trans_b=True) + ct = transformer.make_gemm(previous_h_state_tensor, R['c'], ct, trans_b=True) + if clip is not None: + ct = transformer.make_clip(ct, min=-clip, max=clip) + ct = transformer.make_act(ct, act['g']) + + # Ct = ft (.) Ct-1 + it (.) ct + ft_Ct = transformer.make_mul(ft, previous_c_state_tensor) + it_ct = transformer.make_mul(it, ct) + Ct = transformer.make_add(ft_Ct, it_ct) + previous_c_state_tensor = Ct + + # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo) + ot = transformer.make_gemm(X[i], W['o'], B['o'], trans_b=True) + ot = transformer.make_gemm(previous_h_state_tensor, R['o'], ot, trans_b=True) + peephole_ot = transformer.make_mul(P['o'], Ct) + ot = transformer.make_add(ot, peephole_ot) + if clip is not None: + ot = transformer.make_clip(ot, min=-clip, max=clip) + ot = transformer.make_act(ot, act['f']) + + # Ht = ot (.) h(Ct) + Ht = transformer.make_act(Ct, act['h']) + Ht = transformer.make_mul(ot, Ht) + previous_h_state_tensor = Ht + state_h_tensors += [Ht] + + return (state_h_tensors, previous_c_state_tensor) + + +def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos, + activations, clip, direction, hidden_size, layout): + """Generate Simple (forward or reverse) unrolled LSTM + + Args: + transformer (_ModelTransformerHelper): transformation helper + original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional LSTM operation to unroll + x (list of str): list of input tensors (input tensor split along "time" dimension) + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + activations (list of str): list of length 3 containing names of activation functions + clip (float or None): range which clips input of activations + direction (str): "forward" or "reverse" + hidden_size (int): size of hidden state + layout (int): See attribute description: + https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37 + """ + + inputs = original_node.input + outputs = original_node.output + if direction == 'reverse': + x.reverse() + w = transformer.make_squeeze(inputs[1], axes=[0]) + r = transformer.make_squeeze(inputs[2], axes=[0]) + + b = None + if len(inputs) > 3 and inputs[3] != '': + b = transformer.make_squeeze(inputs[3], axes=[0]) + + initial_h = None + if len(inputs) > 5 and inputs[5] != '': + direction_dim = layout + initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim]) + + initial_c = None + if len(inputs) > 6 and inputs[6] != '': + direction_dim = layout + initial_c = transformer.make_squeeze(inputs[6], axes=[direction_dim]) + + p = None + if len(inputs) > 7 and inputs[7] != '': + p = transformer.make_squeeze(inputs[7], axes=[0]) + + dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype) + batch_size = tensor_infos[inputs[0]].shape[1 - layout] + + act = {'f': activations[0], 'g': activations[1], 'h': activations[2]} + + state_h_tensors, state_c_tensor = _generate_one_direction_LSTM( + transformer, x, w, r, b, initial_h, initial_c, p, clip, act, dtype, hidden_size, + batch_size) + + y_direction_dim = layout + 1 + y_h_direction_dim = layout + state_layout_tensors = [] + seq_length_dim = layout + for h_state in state_h_tensors: + state_layout_tensors += [ + transformer.make_unsqueeze(h_state, axes=[seq_length_dim, y_direction_dim]) + ] + + # use low-level interface to attach to existing tensors + Y_h = outputs[1] + transformer.make_node( + 'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) + Y_c = outputs[2] + transformer.make_node( + 'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim]) + if direction == 'reverse': + state_layout_tensors.reverse() + Y = outputs[0] + transformer.make_node( + 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + + +def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations, + clip, hidden_size, layout): + """Generate Bidirectional unrolled LSTM + + Args: + transformer (_ModelTransformerHelper): transformation helper + original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional LSTM operation to unroll + x (list of str): list of input tensors (input tensor split along "time" dimension) + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + activations (list of str): list of length 6, containing names of forward and reverse activations + clip (float or None): range which clips input of activations + hidden_size (int): size of hidden state + layout (int): See attribute description: + https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37 + """ + + inputs = original_node.input + outputs = original_node.output + + w = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0) + r = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0) + for d in range(2): + w[d] = transformer.make_squeeze(w[d], axes=[0]) + r[d] = transformer.make_squeeze(r[d], axes=[0]) + + b = [None, None] + if len(inputs) > 3 and inputs[3] != '': + b = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0) + for d in range(2): + b[d] = transformer.make_squeeze(b[d], axes=[0]) + + initial_h = [None, None] + if len(inputs) > 5 and inputs[5] != '': + direction_dim = layout + initial_h = transformer.make_split( + inputs[5], split_sizes=[1, 1], axis=direction_dim) + for d in range(2): + initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim]) + + initial_c = [None, None] + if len(inputs) > 6 and inputs[6] != '': + direction_dim = layout + initial_c = transformer.make_split( + inputs[6], split_sizes=[1, 1], axis=direction_dim) + for d in range(2): + initial_c[d] = transformer.make_squeeze(initial_c[d], axes=[direction_dim]) + + p = [None, None] + if len(inputs) > 7 and inputs[7] != '': + p = transformer.make_split(inputs[7], split_sizes=[1, 1], axis=0) + for d in range(2): + p[d] = transformer.make_squeeze(p[d], axes=[0]) + + dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype) + batch_size = tensor_infos[inputs[0]].shape[1 - layout] + + act = [{ + 'f': activations[0], + 'g': activations[1], + 'h': activations[2] + }, { + 'f': activations[3], + 'g': activations[4], + 'h': activations[5] + }] + + state_f_h_tensors, state_f_c_tensor = _generate_one_direction_LSTM( + transformer, x, w[0], r[0], b[0], initial_h[0], initial_c[0], p[0], clip, act[0], + dtype, hidden_size, batch_size) + x.reverse() + state_b_h_tensors, state_b_c_tensor = _generate_one_direction_LSTM( + transformer, x, w[1], r[1], b[1], initial_h[1], initial_c[1], p[1], clip, act[1], + dtype, hidden_size, batch_size) + state_b_h_tensors.reverse() + + y_direction_dim = layout + 1 + y_c_direction_dim = layout + state_layout_tensors = [] + seq_length_dim = layout + for f_h_state, b_h_state in zip(state_f_h_tensors, state_b_h_tensors): + state_f_layout_tensors = transformer.make_unsqueeze( + f_h_state, axes=[seq_length_dim, y_direction_dim]) + state_b_layout_tensors = transformer.make_unsqueeze( + b_h_state, axes=[seq_length_dim, y_direction_dim]) + state_layout_tensors += [ + transformer.make_concat( + [state_f_layout_tensors, state_b_layout_tensors], axis=y_direction_dim) + ] + + last_f_state_layout_tensor = transformer.make_unsqueeze( + state_f_h_tensors[-1], axes=[y_c_direction_dim]) + last_b_state_layout_tensor = transformer.make_unsqueeze( + state_b_h_tensors[0], axes=[y_c_direction_dim]) + + Y_h = outputs[1] + transformer.make_node( + 'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h], + axis=y_c_direction_dim) + + Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim]) + Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim]) + Y_c = outputs[2] + transformer.make_node( + 'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim) + + Y = outputs[0] + transformer.make_node( + 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + + +def _legalize_LSTM(transformer, tensor_infos, node): + """Unroll LSTM operation + + Args: + transformer (_ModelTransformerHelper): transformation helper + tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info + node (onnx.onnx_ml_pb2.NodeProto): LSTM operation to unroll + """ + inputs = node.input + if len(inputs) > 4 and inputs[4] != '': + raise NotImplementedError('Variadic length of output is not supported') + # attributes + activation_alpha = [] + activation_beta = [] + activations = ['Sigmoid', 'Tanh', 'Tanh'] * 2 + clip = None + direction = 'forward' + hidden_size = 0 + input_forget = 0 + layout = 0 + + for attr in node.attribute: + if attr.name == 'activation_alpha': + activation_alpha = attr.floats + if attr.name == 'activation_beta': + activation_beta = attr.floats + if attr.name == 'activations': + activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings))) + if attr.name == 'clip': + clip = attr.f + if attr.name == 'direction': + direction = attr.s.decode('UTF-8') + if attr.name == 'hidden_size': + hidden_size = attr.i + if attr.name == 'input_forget': + input_forget = attr.i + if attr.name == 'layout': + layout = attr.i + + if len(activation_alpha) > 0 or len(activation_beta) > 0: + raise NotImplementedError('Unsupported parameters for LSTM activations') + + for act in activations: + if act not in ['Relu', 'Tanh', 'Sigmoid']: + raise NotImplementedError('Unsupported activation function') + + if input_forget != 0: + raise NotImplementedError('Unsupported input_forget attribute value') + + seq_length_dim = layout + seq_length = tensor_infos[inputs[0]].shape[seq_length_dim] + if hidden_size == 0: + hidden_size = tensor_infos[inputs[2]].shape[2] + + input_split_tensor = transformer.make_split( + inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim) + x = [] + for i in range(len(input_split_tensor)): + input_frame_tensor = input_split_tensor[i] + squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0]) + x += [squeezed_frame_tensor] + + if direction in ['forward', 'reverse']: + _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations, + clip, direction, hidden_size, layout) + elif direction == 'bidirectional': + _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations, + clip, hidden_size, layout) + else: + raise RuntimeError('Unknown LSTM type') + + transformer.mark_for_deletion(node) + + +def legalize(model, options): + """Replace selected operations in onnx model + + Replaces operations, selected by given options with different operation sequences. + For example remove unsupported parts of graph with sequences of supported operations. + + Note that graph is changes inplace. + + Args: + model (onnx.onnx_ml_pb2.ModelProto): target model + options (LegalizeOptions): + """ + tensor_infos = _get_tensor_infos(model) + + transformer = _ModelTransformerHelper(model) + + node_id = 0 + while node_id < len(model.graph.node): + node = model.graph.node[node_id] + if node.op_type == 'RNN' and options.unroll_rnn: + # opset version is required by split operation + if model.opset_import[0].version >= 13: + raise NotImplementedError( + 'Can not generate code with opcode version 13 and greater') + transformer.set_insert_id(node_id) + _legalize_RNN(transformer, tensor_infos, node) + node_id = transformer.get_insert_id() + elif node.op_type == 'LSTM' and options.unroll_lstm: + if model.opset_import[0].version >= 13: + raise NotImplementedError( + 'Can not generate code with opcode version 13 and greater') + transformer.set_insert_id(node_id) + _legalize_LSTM(transformer, tensor_infos, node) + node_id = transformer.get_insert_id() + node_id += 1 + + transformer.delete_marked_nodes() + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n' + '\n' + ' In stand-alone utility mode this tool provides basic funtionality\n' + ' If you want to have more control over applied transformations, use this legalizer as a library') + exit(1) + options = LegalizeOptions() + options.unroll_lstm = True + options.unroll_rnn = True + model = onnx.load(sys.argv[1]) + legalize(model, options) + onnx.save(model, sys.argv[2]) diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt index 6f9f2847e..caea756c2 100644 --- a/compiler/one-cmds/tests/CMakeLists.txt +++ b/compiler/one-cmds/tests/CMakeLists.txt @@ -3,6 +3,7 @@ # Gather test scripts file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test") file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg") +file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json") # Create a script to run the tests at installation folder set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh") @@ -39,6 +40,11 @@ foreach(CONFIGITEM IN ITEMS ${CONFIGITEMS}) install(FILES ${CONFIGITEM} DESTINATION test) endforeach(CONFIGITEM) +foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS}) + get_filename_component(ITEM_PREFIX ${QCONFIGITEM} NAME_WE) + install(FILES ${QCONFIGITEM} DESTINATION test) +endforeach(QCONFIGITEM) + file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n") file(APPEND "${DRIVER_SCRIPT}" @@ -52,6 +58,8 @@ fi\n set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh") set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py") +set(ONNX_LEGALIZE_RUN_COMPARE "${CMAKE_CURRENT_SOURCE_DIR}/onnx_legalize_run_compare.py") +set(PRINT_ONNX_MODEL "${CMAKE_CURRENT_SOURCE_DIR}/print_onnx_model.py") install(FILES ${DRIVER_SCRIPT} PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE @@ -71,5 +79,23 @@ install(FILES ${PREPROCESS_IMAGES_PY} WORLD_READ DESTINATION test) +install(FILES ${ONNX_LEGALIZE_RUN_COMPARE} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + +install(FILES ${PRINT_ONNX_MODEL} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.txt DESTINATION test) + +add_subdirectory(onnx-operations) + +if(ENABLE_ONE_IMPORT_PYTORCH) + add_subdirectory(pytorch-operations) +endif(ENABLE_ONE_IMPORT_PYTORCH) diff --git a/compiler/one-cmds/tests/one-quantize_009.qconf.json b/compiler/one-cmds/tests/one-quantize_009.qconf.json new file mode 100644 index 000000000..ac274e83a --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_009.qconf.json @@ -0,0 +1,36 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "InceptionV3/InceptionV3/Mixed_5b/concat", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "InceptionV3/InceptionV3/Mixed_7c/concat", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "InceptionV3/Predictions/Reshape_1", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/one-cmds/tests/one-quantize_009.test b/compiler/one-cmds/tests/one-quantize_009.test new file mode 100644 index 000000000..aa0670350 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_009.test @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.random.quantized.mixed.circle" + +rm -rf ${outputfile} + +# to create inception_v3.circle +if [[ ! -s ${inputfile} ]]; then + /bin/bash one-import_001.test > /dev/null 2>&1 + return_code=$? + if [[ ${return_code} != 0 ]]; then + trap_err_onexit + fi +fi + +# run test without input data +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--quant_config one-quantize_009.qconf.json \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt new file mode 100644 index 000000000..e6b2b354a --- /dev/null +++ b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt @@ -0,0 +1,86 @@ +# Install one-cmds test scripts for onnx models + +# Gather test scripts +set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples") +file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*") + +set(TEST_DST test/onnx-operations) + +install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}") + +set(ONNX_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm") + +foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES}) + set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test") + + # generate test script + file(WRITE "${TEST_SCRIPT}" "#!/bin/bash\n\n") + file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n") + file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n") + file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n") + file(APPEND "${TEST_SCRIPT}" "{\n") + file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} FAILED\"\n") + file(APPEND "${TEST_SCRIPT}" "exit 255\n") + file(APPEND "${TEST_SCRIPT}" "}\n") + file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n") + file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n") + file(APPEND "${TEST_SCRIPT}" "one-import-onnx --input_path=${TEST_ITEM}.onnx --output_path=${TEST_ITEM}.circle\ + ${ONNX_IMPORT_OPTIONS} &> /dev/null\n") + file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n") + file(APPEND "${TEST_SCRIPT}" "trap_err_onexit\n") + file(APPEND "${TEST_SCRIPT}" "fi\n") + file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n") + + install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}") +endforeach(TEST_ITEM) + + +# Create a script to run the tests at installation folder +set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh") + +file(WRITE "${DRIVER_SCRIPT}" "#!/bin/bash\n\n") +file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n") +file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n") +file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n") +file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n") +file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n") +file(APPEND "${DRIVER_SCRIPT}" " USER_PATH=$1\n") +file(APPEND "${DRIVER_SCRIPT}" " export PATH=$USER_PATH:$PATH\n") +file(APPEND "${DRIVER_SCRIPT}" "fi\n") +file(APPEND "${DRIVER_SCRIPT}" "\n") +file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n") +file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n") +file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n") +file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n") + +foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES}) + file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n") +endforeach(TEST_ITEM) + +file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n") + +file(APPEND "${DRIVER_SCRIPT}" +"if [[ $fail_count != 0 ]]; then + echo \"$fail_count TESTS FAILED\" + exit 255 +else + echo \"ALL TESTS PASSED!\" +fi\n +") + +set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh") + +install(FILES "${DRIVER_SCRIPT}" + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES "${PREPARE_TEST_MATERIALS_SH}" + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md" + DESTINATION "${TEST_DST}") diff --git a/compiler/one-cmds/tests/onnx-operations/README.md b/compiler/one-cmds/tests/onnx-operations/README.md new file mode 100644 index 000000000..928fb84dd --- /dev/null +++ b/compiler/one-cmds/tests/onnx-operations/README.md @@ -0,0 +1,28 @@ +## Overview + +This directory contains auxilliary tests for small onnx target models. + +Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics. + +Models for these tests are taken from res/PyTorchExamples. + +## To run all tests + +Steps: +1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow + - you need to run this only once + - read 'doc/how-to-prepare-virtualenv.txt' for more information + ``` + bin/one-prepare-venv + ``` +2) run 'test/onnx-operations/prepare_test_materials.sh' to download test material models + - you need to run this only once + - you need internet connection to download files + - you may need to install 'wget' and 'unzip' packages + ``` + test/onnx-operations/prepare_test_materials.sh + ``` +3) run 'test/onnx-operations/runtestall.sh' to run the test + ``` + test/onnx-operations/runtestall.sh + ``` diff --git a/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh new file mode 100644 index 000000000..274a60f0a --- /dev/null +++ b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +pushd $SCRIPT_PATH > /dev/null + +for test_case in examples/*; do + python3 ptem.py $(basename ${test_case}) +done + +cp output/*.onnx . + +popd > /dev/null diff --git a/compiler/one-cmds/tests/onnx_legalize_run_compare.py b/compiler/one-cmds/tests/onnx_legalize_run_compare.py new file mode 100644 index 000000000..9b02b74af --- /dev/null +++ b/compiler/one-cmds/tests/onnx_legalize_run_compare.py @@ -0,0 +1,129 @@ +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import onnxruntime as rt +import onnx +import sys +import numpy as np +import importlib.util + + +def _generate_inputs(model): + """Generate random inputs for given model + + Args: + model (onnx.onnx_ml_pb2.ModelProto): target model + + Returns: + dict from str to numpy.ndarray: generated inputs + """ + inputs = {} + for input in model.graph.input: + # check if elem type is float32 + # list of types could be extended, this is a property of current testsuite + assert ( + input.type.tensor_type.elem_type == onnx.TensorProto.DataType.Value("FLOAT")) + input_shape = [] + for dim in input.type.tensor_type.shape.dim: + input_shape += [dim.dim_value] + inputs[input.name] = np.random.random(input_shape).astype(np.float32) + return inputs + + +def _run_model(model, inputs): + """Run given model + + Args: + model (onnx.onnx_ml_pb2.ModelProto): target model + inputs (dict from str to numpy.ndarray): sample inputs + + Returns: + list of numpy.ndarray: inference outputs + """ + output_names = list(map(lambda output: output.name, model.graph.output)) + session = rt.InferenceSession(model.SerializeToString()) + outputs = session.run(output_names, inputs) + return outputs + + +def _compare_results(ref_outputs, test_outputs, tolerance): + """Generate random inputs for given model + + Args: + ref_outputs (list of numpy.ndarray): reference values (original model results) + test_outputs (list of numpy.ndarray): tested values (modified model results) + tolerance (float): maximum acceptable relative difference + + Returns: + bool: True if outputs considered equal, False otherwise + """ + num_outputs = len(ref_outputs) + assert (len(test_outputs) == num_outputs) + for i in range(num_outputs): + if ref_outputs[i].shape != test_outputs[i].shape: + print("output {} shape mismatch: ref({}) vs test({})".format( + i, ref_outputs[i].shape, test_outputs[i].shape)) + return False + + abs_difference = np.abs(ref_outputs[i] - test_outputs[i]) + abs_ref_maximum = np.abs(ref_outputs[i]).max() + peak_error = abs_difference.max() / abs_ref_maximum + + if peak_error > tolerance: + print("output {} peak error to value ratio {} is too big".format( + i, peak_error)) + return False + return True + + +if __name__ == '__main__': + if len(sys.argv) < 6: + exit('expecting 5 arguments:\n' + ' - path to input model\n' + ' - path to "legalized" model\n' + ' - path to onnx_legalizer.py\n' + ' - base name for generated test inputs\n' + ' - output tolerance') + input_model_path = sys.argv[1] + output_model_path = sys.argv[2] + onnx_legalizer_path = sys.argv[3] + input_dump_path = sys.argv[4] + tolerance = float(sys.argv[5]) + + onnx_legalizer_spec = importlib.util.spec_from_file_location( + "onnx_legalizer", onnx_legalizer_path) + onnx_legalizer = importlib.util.module_from_spec(onnx_legalizer_spec) + onnx_legalizer_spec.loader.exec_module(onnx_legalizer) + + model = onnx.load(input_model_path) + + inputs = _generate_inputs(model) + + for i in inputs: + np.save('{}_{}.npy'.format(input_dump_path, i), inputs[i]) + + ref_outputs = _run_model(model, inputs) + + options = onnx_legalizer.LegalizeOptions() + options.unroll_rnn = True + options.unroll_lstm = True + onnx_legalizer.legalize(model, options) + + with open(output_model_path, 'wb') as f: + f.write(model.SerializeToString()) + + test_outputs = _run_model(model, inputs) + + if not _compare_results(ref_outputs, test_outputs, tolerance): + exit('comparison failed') diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index 7f269530c..c80c59834 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -91,6 +91,39 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444 fi +function files_missing() { + condition="test " + + for f in "${@}"; do + condition="${condition} ! -s ${f} -o" + done + + # last condition is always false to properly close last "or" + condition="${condition} -z non_zero_string " + ${condition} +} + +declare -a TEST_RECCURENT_MODELS=(\ + "RNN.onnx" "RNN-nobias.onnx" "RNN-relu.onnx" "RNN-bi.onnx" "RNN-noinit.onnx"\ + "LSTM.onnx" "LSTM-bi.onnx" "LSTM-noinit.onnx" "LSTM-nobias.onnx" +) + +if files_missing "${TEST_RECCURENT_MODELS[@]}"; then + rm -rf test_onnx_recurrent_models.zip + wget https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip + unzip test_onnx_recurrent_models.zip + # https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097 +fi + +declare -a NEG_TEST_RECCURENT_MODELS=("rnn_variable.onnx" "lstm_variable.onnx") + +if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then + rm -rf neg_test_onnx_recurrent_models.zip + wget https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip + unzip neg_test_onnx_recurrent_models.zip + # https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375 +fi + # prepare 'inception_v3.circle' file used for quantization test inputfile="./inception_v3.pb" outputfile="./inception_v3.circle" diff --git a/compiler/one-cmds/tests/print_onnx_model.py b/compiler/one-cmds/tests/print_onnx_model.py new file mode 100644 index 000000000..ecab0f6da --- /dev/null +++ b/compiler/one-cmds/tests/print_onnx_model.py @@ -0,0 +1,20 @@ +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import onnx +import sys + +if __name__ == '__main__': + model = onnx.load(sys.argv[1]) + print(model) diff --git a/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt new file mode 100644 index 000000000..10f30a5c9 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt @@ -0,0 +1,109 @@ +# Install one-cmds test scripts for pytorch models + +# Gather test scripts +set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples") +file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*") +file(GLOB SPECIAL_TEST_ITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test") + +set(TEST_DST test/pytorch-operations) + +install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}") + +set(PYTORCH_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm") + +foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES}) + set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test") + + # generate test script + file(WRITE "${TEST_SCRIPT}" "#!/bin/bash\n\n") + file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n") + file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n") + file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n") + file(APPEND "${TEST_SCRIPT}" "{\n") + file(APPEND "${TEST_SCRIPT}" " echo \"\${filename_ext} FAILED\"\n") + file(APPEND "${TEST_SCRIPT}" " exit 255\n") + file(APPEND "${TEST_SCRIPT}" "}\n") + file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n") + file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n") + file(APPEND "${TEST_SCRIPT}" "input_shapes=\$(head -n 1 ${TEST_ITEM}.spec)\n") + file(APPEND "${TEST_SCRIPT}" "input_types=\$(tail -n 1 ${TEST_ITEM}.spec)\n") + file(APPEND "${TEST_SCRIPT}" "one-import-pytorch --input_path=${TEST_ITEM}.pth --output_path=${TEST_ITEM}.circle\ + ${PYTORCH_IMPORT_OPTIONS} --input_shapes=\${input_shapes} --input_types=\${input_types} &> /dev/null\n") + file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n") + file(APPEND "${TEST_SCRIPT}" " trap_err_onexit\n") + file(APPEND "${TEST_SCRIPT}" "fi\n") + file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n") + + install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}") +endforeach(TEST_ITEM) + + +# Create a script to run the tests at installation folder +set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh") + +file(WRITE "${DRIVER_SCRIPT}" "#!/bin/bash\n\n") +file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n") +file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n") +file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n") +file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n") +file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n") +file(APPEND "${DRIVER_SCRIPT}" " USER_PATH=$1\n") +file(APPEND "${DRIVER_SCRIPT}" " export PATH=$USER_PATH:$PATH\n") +file(APPEND "${DRIVER_SCRIPT}" "fi\n") +file(APPEND "${DRIVER_SCRIPT}" "\n") +file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n") +file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n") +file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n") +file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n") + +foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES}) + file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n") +endforeach(TEST_ITEM) + +file(APPEND "${DRIVER_SCRIPT}" "\necho \"special test items\" | tee -a runtestall.log\n\n") + +foreach(TEST_ITEM IN ITEMS ${SPECIAL_TEST_ITEMS}) + file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}\" | tee -a runtestall.log\n") +endforeach(TEST_ITEM) + +file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n") + +file(APPEND "${DRIVER_SCRIPT}" +"if [[ $fail_count != 0 ]]; then + echo \"$fail_count TESTS FAILED\" + exit 255 +else + echo \"ALL TESTS PASSED!\" +fi\n +") + +set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh") +set(EXAMPLE_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/example_generator.py") +set(AUX_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/aux_generator.py") + +install(FILES "${DRIVER_SCRIPT}" + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES "${PREPARE_TEST_MATERIALS_SH}" + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES "${EXAMPLE_GENERATOR}" "${AUX_GENERATOR}" + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES ${SPECIAL_TEST_ITEMS} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION "${TEST_DST}") + +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md" + DESTINATION "${TEST_DST}") diff --git a/compiler/one-cmds/tests/pytorch-operations/README.md b/compiler/one-cmds/tests/pytorch-operations/README.md new file mode 100644 index 000000000..231a10eb4 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/README.md @@ -0,0 +1,28 @@ +## Overview + +This directory contains auxilliary tests for small pytorch target models. + +Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics. + +Models for these tests are taken from res/PyTorchExamples. + +## To run all tests + +Steps: +1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow + - you need to run this only once + - read 'doc/how-to-prepare-virtualenv.txt' for more information + ``` + bin/one-prepare-venv + ``` +2) run 'test/pytorch-operations/prepare_test_materials.sh' to download test material models + - you need to run this only once + - you need internet connection to download files + - you may need to install 'wget' and 'unzip' packages + ``` + test/pytorch-operations/prepare_test_materials.sh + ``` +3) run 'test/pytorch-operations/runtestall.sh' to run the test + ``` + test/pytoch-operations/runtestall.sh + ``` diff --git a/compiler/one-cmds/tests/pytorch-operations/aux_generator.py b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py new file mode 100644 index 000000000..6c9afcded --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# PyTorch aux tests generator + +import torch +import torch.nn as nn +import json +import zipfile +import os + + +# model +class net_abs(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, input): + return torch.abs(input) + + +if __name__ == '__main__': + model = net_abs() + # save "entire" model for entire_model.test + torch.save(model, 'entire_model.pth') + + # save state_dict file for state_dict_model.test + state_dict_path = 'state_dict_model.pth' + torch.save(model.state_dict(), state_dict_path) + + # create files for mar_torchscript_model.test + torchscript_path = 'torchscript_model.pth' + inp = torch.randn(1, 2, 3, 3) + traced_model = torch.jit.trace(model, inp) + torch.jit.save(traced_model, torchscript_path) + # create manifest + manifest = {} + manifest['createdOn'] = '11/11/1111 11:11:11' + manifest['runtime'] = 'python' + manifest['model'] = {} + manifest['model']['modelName'] = 'torchscript_model', + manifest['model']['serializedFile'] = torchscript_path + manifest['model']['handler'] = 'image_classifier' + manifest['model']['modelVersion'] = '1.0' + manifest['archiverVersion'] = '0.4.2' + + with zipfile.ZipFile('mar_torchscript_model.mar', 'w') as mar_file: + with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file: + manifest_file.write(json.dumps(manifest).encode()) + mar_file.write(torchscript_path) + + # create files for mar_state_dict_model.test + model_file_path = os.path.basename(__file__) + # create manifest + manifest = {} + manifest['createdOn'] = '11/11/1111 11:11:11' + manifest['runtime'] = 'python' + manifest['model'] = {} + manifest['model']['modelName'] = 'state_dict_model', + manifest['model']['serializedFile'] = state_dict_path + manifest['model']['handler'] = 'image_classifier' + manifest['model']['modelFile'] = model_file_path + manifest['model']['modelVersion'] = '1.0' + manifest['archiverVersion'] = '0.4.2' + + with zipfile.ZipFile('mar_state_dict_model.mar', 'w') as mar_file: + with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file: + manifest_file.write(json.dumps(manifest).encode()) + mar_file.write(state_dict_path) + mar_file.write(model_file_path) diff --git a/compiler/one-cmds/tests/pytorch-operations/entire_model.test b/compiler/one-cmds/tests/pytorch-operations/entire_model.test new file mode 100644 index 000000000..a72a56ffd --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/entire_model.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test one-import-pytorch ability to import NN model stored in python file and serialized "entire" model. +# "Entire" model is serialized with `torch.save(model)` method. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +outputfile="entire_model.circle" + +# run test +one-import-pytorch --input_path=entire_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/pytorch-operations/example_generator.py b/compiler/one-cmds/tests/pytorch-operations/example_generator.py new file mode 100644 index 000000000..20a80c895 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/example_generator.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# PyTorch Example manager + +import torch +import importlib +import argparse +import os + +from pathlib import Path + +print("PyTorch version=", torch.__version__) + +parser = argparse.ArgumentParser(description='Process PyTorch python examples') + +parser.add_argument('examples', metavar='EXAMPLES', nargs='+') + +args = parser.parse_args() + +output_folder = "./" + +Path(output_folder).mkdir(parents=True, exist_ok=True) + + +class JitWrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, *args): + if len(args) == 1: + return self.model.forward(args[0]) + else: + return self.model.forward(args) + + +for example in args.examples: + print("Generate '" + example + ".pth'", end='') + # load example code + # replace - with _ in name, otherwise pytorch generates invalid torchscript + module_name = "examples." + example.replace('-', '_') + module_loader = importlib.machinery.SourceFileLoader( + module_name, os.path.join("examples", example, "__init__.py")) + module_spec = importlib.util.spec_from_loader(module_name, module_loader) + module = importlib.util.module_from_spec(module_spec) + module_loader.exec_module(module) + + jittable_model = JitWrapper(module._model_) + + traced_model = torch.jit.trace(jittable_model, module._dummy_) + # save .pth + torch.jit.save(traced_model, output_folder + example + ".pth") + + input_shapes = "" + input_types = "" + + input_samples = module._dummy_ + if isinstance(input_samples, torch.Tensor): + input_samples = [input_samples] + for inp_idx in range(len(input_samples)): + input_data = input_samples[inp_idx] + + shape = input_data.shape + for dim in range(len(shape)): + input_shapes += str(shape[dim]) + if dim != len(shape) - 1: + input_shapes += "," + + if input_data.dtype == torch.bool: + input_types += "bool" + elif input_data.dtype == torch.uint8: + input_types += "uint8" + elif input_data.dtype == torch.int8: + input_types += "int8" + elif input_data.dtype == torch.int16: + input_types += "int16" + elif input_data.dtype == torch.int32: + input_types += "int32" + elif input_data.dtype == torch.int64: + input_types += "int16" + elif input_data.dtype == torch.float16: + input_types += "float32" + elif input_data.dtype == torch.float32: + input_types += "float32" + elif input_data.dtype == torch.float64: + input_types += "float64" + elif input_data.dtype == torch.complex64: + input_types += "complex64" + elif input_data.dtype == torch.complex128: + input_types += "complex128" + else: + raise ValueError('unsupported dtype') + + if inp_idx != len(input_samples) - 1: + input_shapes += ":" + input_types += "," + + with open(example + ".spec", "w") as spec_file: + print(input_shapes, file=spec_file) + print(input_types, file=spec_file) + + print(" - Done") diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test new file mode 100644 index 000000000..9892dbbed --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test one-import-pytorch ability to import .mar file. +# .mar file contains python source of the model and serialized state_dict. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +outputfile="mar_state_dict_model.circle" + +# run test +one-import-pytorch --input_path=mar_state_dict_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test new file mode 100644 index 000000000..3ac38a42e --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test one-import-pytorch ability to import .mar file. +# .mar file contains TorchScript. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +outputfile="mar_torchscript_model.circle" + +# run test +one-import-pytorch --input_path=mar_torchscript_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh new file mode 100644 index 000000000..5f38610d7 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +pushd $SCRIPT_PATH > /dev/null + +for test_case in examples/*; do + python3 example_generator.py $(basename ${test_case}) +done + +python3 aux_generator.py + +popd > /dev/null diff --git a/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test new file mode 100644 index 000000000..ecd2a8112 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test one-import-pytorch ability to import NN model from .py file and serialized state_dict file. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +outputfile="state_dict_model.circle" + +# run test +one-import-pytorch --input_path=state_dict_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test new file mode 100644 index 000000000..590e5b369 --- /dev/null +++ b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test one-import-pytorch ability to import TorchScript file. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +outputfile="torchscript_model.circle" + +# run test +one-import-pytorch --input_path=torchscript_model.pth --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py index 5d84c2bd5..be0322aca 100644 --- a/compiler/one-cmds/utils.py +++ b/compiler/one-cmds/utils.py @@ -17,80 +17,13 @@ import argparse import configparser import glob +import importlib import ntpath import os import subprocess import sys - -class _CONSTANT: - __slots__ = () # This prevents access via __dict__. - OPTIMIZATION_OPTS = ( - # (OPTION_NAME, HELP_MESSAGE) - ('O1', 'enable O1 optimization pass'), - ('convert_nchw_to_nhwc', - 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.' - ), - ('expand_broadcast_const', 'expand broadcastable constant node inputs'), - ('nchw_to_nhwc_input_shape', - 'convert the input shape of the model (argument for convert_nchw_to_nhwc)'), - ('nchw_to_nhwc_output_shape', - 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'), - ('fold_add_v2', 'fold AddV2 op with constant inputs'), - ('fold_cast', 'fold Cast op with constant input'), - ('fold_dequantize', 'fold Dequantize op'), - ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), - ('fold_sparse_to_dense', 'fold SparseToDense op'), - ('forward_reshape_to_unaryop', 'Forward Reshape op'), - ('fuse_add_with_tconv', 'fuse Add op to Transposed'), - ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'), - ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'), - ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'), - ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'), - ('fuse_bcq', 'apply Binary Coded Quantization'), - ('fuse_preactivation_batchnorm', - 'fuse BatchNorm operators of pre-activations to Convolution op'), - ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'), - ('fuse_transpose_with_mean', - 'fuse Mean with a preceding Transpose under certain conditions'), - ('make_batchnorm_gamma_positive', - 'make negative gamma of BatchNorm to a small positive value (1e-10).' - ' Note that this pass can change the execution result of the model.' - ' So, use it only when the impact is known to be acceptable.'), - ('fuse_activation_function', 'fuse Activation function to a preceding operator'), - ('fuse_instnorm', 'fuse ops to InstanceNorm operator'), - ('replace_cw_mul_add_with_depthwise_conv', - 'replace channel-wise Mul/Add with DepthwiseConv2D'), - ('remove_fakequant', 'remove FakeQuant ops'), - ('remove_quantdequant', 'remove Quantize-Dequantize sequence'), - ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'), - ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'), - ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'), - ('remove_unnecessary_slice', 'remove unnecessary slice ops'), - ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'), - ('remove_unnecessary_split', 'remove unnecessary split ops'), - ('resolve_customop_add', 'convert Custom(Add) op to Add op'), - ('resolve_customop_batchmatmul', - 'convert Custom(BatchMatmul) op to BatchMatmul op'), - ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'), - ('resolve_customop_max_pool_with_argmax', - 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'), - ('shuffle_weight_to_16x1float32', - 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.' - ' Note that it only converts weights whose row is a multiple of 16'), - ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'), - ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'), - ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'), - ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'), - ('substitute_strided_slice_to_reshape', - 'convert certain condition StridedSlice to Reshape'), - ('substitute_transpose_to_reshape', - 'convert certain condition Transpose to Reshape'), - ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'), - ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op')) - - -_CONSTANT = _CONSTANT() +import onelib.constant as _constant def _add_default_arg(parser): @@ -116,7 +49,10 @@ def _add_default_arg(parser): def is_accumulated_arg(arg, driver): if driver == "one-quantize": - if arg == "tensor_name" or arg == "scale" or arg == "zero_point": + accumulables = [ + "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name" + ] + if arg in accumulables: return True return False @@ -189,83 +125,6 @@ def _parse_cfg(args, driver_name): setattr(args, key, config[secton_to_run][key]) -def _make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path): - """make a command for running tf2tfliteV2.py""" - cmd = [sys.executable, os.path.expanduser(driver_path)] - # verbose - if _is_valid_attr(args, 'verbose'): - cmd.append('--verbose') - # model_format - if _is_valid_attr(args, 'model_format_cmd'): - cmd.append(getattr(args, 'model_format_cmd')) - elif _is_valid_attr(args, 'model_format'): - cmd.append('--' + getattr(args, 'model_format')) - else: - cmd.append('--graph_def') # default value - # converter version - if _is_valid_attr(args, 'converter_version_cmd'): - cmd.append(getattr(args, 'converter_version_cmd')) - elif _is_valid_attr(args, 'converter_version'): - cmd.append('--' + getattr(args, 'converter_version')) - else: - cmd.append('--v1') # default value - # input_path - if _is_valid_attr(args, 'input_path'): - cmd.append('--input_path') - cmd.append(os.path.expanduser(input_path)) - # output_path - if _is_valid_attr(args, 'output_path'): - cmd.append('--output_path') - cmd.append(os.path.expanduser(output_path)) - # input_arrays - if _is_valid_attr(args, 'input_arrays'): - cmd.append('--input_arrays') - cmd.append(getattr(args, 'input_arrays')) - # input_shapes - if _is_valid_attr(args, 'input_shapes'): - cmd.append('--input_shapes') - cmd.append(getattr(args, 'input_shapes')) - # output_arrays - if _is_valid_attr(args, 'output_arrays'): - cmd.append('--output_arrays') - cmd.append(getattr(args, 'output_arrays')) - - return cmd - - -def _make_tflite2circle_cmd(driver_path, input_path, output_path): - """make a command for running tflite2circle""" - cmd = [driver_path, input_path, output_path] - return [os.path.expanduser(c) for c in cmd] - - -def _make_circle2circle_cmd(args, driver_path, input_path, output_path): - """make a command for running circle2circle""" - cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]] - # profiling - if _is_valid_attr(args, 'generate_profile_data'): - cmd.append('--generate_profile_data') - # optimization pass(only true/false options) - # TODO support options whose number of arguments is more than zero - for opt in _CONSTANT.OPTIMIZATION_OPTS: - if _is_valid_attr(args, opt[0]): - # ./driver --opt[0] - if type(getattr(args, opt[0])) is bool: - cmd.append('--' + opt[0]) - """ - This condition check is for config file interface, usually would be - SomeOption=True - but user can write as follows while development - SomeOption=False - instead of removing SomeOption option - """ - if type(getattr(args, opt[0])) is str and not getattr( - args, opt[0]).lower() in ['false', '0', 'n']: - cmd.append('--' + opt[0]) - - return cmd - - def _print_version_and_exit(file_path): """print version of the file located in the file_path""" script_path = os.path.realpath(file_path) @@ -368,3 +227,34 @@ def _get_optimization_list(get_name=False): opt_list = [_remove_suffix(s, '.cfg') for s in opt_list] return opt_list + + +def _detect_one_import_drivers(search_path): + """Looks for import drivers in given directory + + Args: + search_path: path to the directory where to search import drivers + + Returns: + dict: each entry is related to single detected driver, + key is a config section name, value is a driver name + + """ + import_drivers_dict = {} + for module_name in os.listdir(search_path): + full_path = os.path.join(search_path, module_name) + if not os.path.isfile(full_path): + continue + if module_name.find("one-import-") != 0: + continue + module_loader = importlib.machinery.SourceFileLoader(module_name, full_path) + module_spec = importlib.util.spec_from_loader(module_name, module_loader) + module = importlib.util.module_from_spec(module_spec) + try: + module_loader.exec_module(module) + if hasattr(module, "get_driver_cfg_section"): + section = module.get_driver_cfg_section() + import_drivers_dict[section] = module_name + except: + pass + return import_drivers_dict diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt index 418bc27ac..951194d9d 100644 --- a/compiler/oneco/CMakeLists.txt +++ b/compiler/oneco/CMakeLists.txt @@ -22,11 +22,11 @@ target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto) target_link_libraries(moco_onnx_frontend PUBLIC loco) target_link_libraries(moco_onnx_frontend PRIVATE cwrap) -nnas_find_package(GTest QUIET) - -if(NOT GTest_FOUND) +if(NOT ENABLE_TEST) return() -endif(NOT GTest_FOUND) +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest QUIET) add_executable(moco_onnx_frontend_test ${TESTS}) target_include_directories(moco_onnx_frontend_test PRIVATE src) diff --git a/compiler/pepper-strcast/CMakeLists.txt b/compiler/pepper-strcast/CMakeLists.txt index 5f87e9488..bcc07f482 100644 --- a/compiler/pepper-strcast/CMakeLists.txt +++ b/compiler/pepper-strcast/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(pepper_strcast STATIC ${SOURCES}) -set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(pepper_strcast PUBLIC include) target_link_libraries(pepper_strcast PRIVATE nncc_common) target_link_libraries(pepper_strcast PUBLIC nncc_coverage) diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 00ffb57de..51fd9a391 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,7 +1,9 @@ unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) +unset(QUANTIZATION_CONFIG_VALUE_TEST) +unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM) -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 2.0 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)") return() @@ -12,6 +14,11 @@ macro(addTest NAME GRANULARITY DTYPE) list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE}) endmacro(addTest) +macro(addQConfTest NAME GRANULARITY DTYPE) + list(APPEND QUANTIZATION_CONFIG_VALUE_TEST ${NAME}) + list(APPEND QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE}) +endmacro(addQConfTest) + # Read "test.lst" include("test.lst") # Read "test.local.lst" if exists @@ -20,12 +27,12 @@ include("test.local.lst" OPTIONAL) unset(TEST_DEPS) get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) -get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR) +get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py" "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY) -set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0") +set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0") ### ### Generate test.config @@ -89,5 +96,22 @@ add_test( ${QUANTIZATION_VALUE_TEST_WITH_PARAM} ) +add_test( + NAME pota_fake_wquant_test_with_config + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_fake_wquant_with_config.sh" + "${TEST_CONFIG}" + "${ARTIFACTS_BIN_PATH}" + ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM} +) + +add_test( + NAME pota_quantization_test_with_config + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh" + "${TEST_CONFIG}" + "${ARTIFACTS_BIN_PATH}" + ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM} +) + set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test) set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test) +set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config) diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json new file mode 100644 index 000000000..174d6e9b0 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "out", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json new file mode 100644 index 000000000..733f46e60 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "out", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json new file mode 100644 index 000000000..630c3e420 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json @@ -0,0 +1,14 @@ +{ + "layers" : [ + { + "name" : "ofm1", + "dtype" : "uint8", + "granularity" : "channel" + }, + { + "name" : "ofm2", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json new file mode 100644 index 000000000..cc98d7c62 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json @@ -0,0 +1,14 @@ +{ + "layers" : [ + { + "name" : "ofm1", + "dtype" : "int16", + "granularity" : "channel" + }, + { + "name" : "ofm2", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json new file mode 100644 index 000000000..838b331fd --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "layer" + } + ] +} diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json new file mode 100644 index 000000000..7cd6ce713 --- /dev/null +++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json @@ -0,0 +1,9 @@ +{ + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..a223fa4aa --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.038489170372486115, + "zero_point": 129.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json new file mode 100644 index 000000000..ec6082d55 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json @@ -0,0 +1,32 @@ +{ + "weights": [ + [ + [ + [ + 136, + 153, + 68 + ], + [ + 51, + 34, + 221 + ] + ], + [ + [ + 0, + 255, + 187 + ], + [ + 85, + 170, + 102 + ] + ] + ] + ], + "scale": 0.05882352963089943, + "zero_point": 119.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..afa9b1a8e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0892433300614357, + "zero_point": 134.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..a7298cb58 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014653272228315473, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json new file mode 100644 index 000000000..ab968c9fc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json @@ -0,0 +1,32 @@ +{ + "weights": [ + [ + [ + [ + 4096, + 8192, + -12288 + ], + [ + -16384, + -20479, + 24575 + ] + ], + [ + [ + -28671, + 32767, + 16384 + ], + [ + -8192, + 12288, + -4096 + ] + ] + ] + ], + "scale": 0.0002441480755805969, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..3cb0552e9 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00037035736022517085, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..0528cc9cc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03911808878183365, + "zero_point": 127.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..ac5da0bda --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.027372928336262703, + "zero_point": 141.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..353f15a6b --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0001523942337371409, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..c4ace78d4 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00012122748012188822, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..522880618 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.05882352963089943, + "zero_point": 119.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json new file mode 100644 index 000000000..17ba25363 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json @@ -0,0 +1,28 @@ +{ + "weights": [ + [ + [ + [ + 136, + 153 + ], + [ + 68, + 51 + ] + ], + [ + [ + 34, + 221 + ], + [ + 0, + 255 + ] + ] + ] + ], + "scale": 0.05882352963089943, + "zero_point": 119.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..522880618 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.05882352963089943, + "zero_point": 119.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..71265a270 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0002441480755805969, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json new file mode 100644 index 000000000..53d7cdba3 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json @@ -0,0 +1,28 @@ +{ + "weights": [ + [ + [ + [ + 4096, + 8192 + ], + [ + -12288, + -16384 + ] + ], + [ + [ + -20479, + 24575 + ], + [ + -28671, + 32767 + ] + ] + ] + ], + "scale": 0.0002441480755805969, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..71265a270 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0002441480755805969, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json new file mode 100644 index 000000000..2558bb2be --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 1.0039215087890625, + 2.007843017578125 + ], + [ + -3.0117650032043457, + -4.015686511993408 + ] + ], + [ + [ + -5.019608497619629, + 6.023530006408691 + ], + [ + -7.027451515197754, + 7.9686279296875 + ] + ] + ], + [ + [ + [ + 4.01568603515625, + -2.007843494415283 + ], + [ + 3.0117645263671875, + -1.0039215087890625 + ] + ], + [ + [ + -7.9686279296875, + -6.023530006408691 + ], + [ + 7.027451515197754, + 5.019608497619629 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json new file mode 100644 index 000000000..50d44ece7 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json @@ -0,0 +1,7 @@ +{ + "weights": [ + 4069, + 8138 + ], + "scale": 0.0002457468386200985 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..24508860d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003916590008884668, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json new file mode 100644 index 000000000..b249a0ce5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json @@ -0,0 +1,52 @@ +{ + "weights": [ + [ + [ + [ + 143, + 159 + ], + [ + 79, + 63 + ] + ], + [ + [ + 47, + 223 + ], + [ + 15, + 254 + ] + ] + ], + [ + [ + [ + 191, + 95 + ], + [ + 175, + 111 + ] + ], + [ + [ + 0, + 31 + ], + [ + 239, + 207 + ] + ] + ] + ], + "scale": 0.062745101749897, + "zero_point": 127.0, + "min": -7.9686279296875, + "max": 8.031373023986816 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..a2dd6681f --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.037479765713214874, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..8817cbef7 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 1.000030517578125, + 2.00006103515625 + ], + [ + -3.000091552734375, + -4.0001220703125 + ] + ], + [ + [ + -4.999908447265625, + 5.99993896484375 + ], + [ + -6.999969482421875, + 8.0 + ] + ] + ], + [ + [ + [ + 4.0001220703125, + -2.00006103515625 + ], + [ + 3.000091552734375, + -1.000030517578125 + ] + ], + [ + [ + -8.0, + -5.99993896484375 + ], + [ + 6.999969482421875, + 4.999908447265625 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json new file mode 100644 index 000000000..b00d8d211 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 26925029, + 53850057 + ], + "scale": [ + 3.714016479907864e-08, + 3.714016479907864e-08 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..df5d06c09 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015212147263810039, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json new file mode 100644 index 000000000..94c794fbb --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json @@ -0,0 +1,61 @@ +{ + "weights": [ + [ + [ + [ + 4096, + 8192 + ], + [ + -12288, + -16384 + ] + ], + [ + [ + -20479, + 24575 + ], + [ + -28671, + 32767 + ] + ] + ], + [ + [ + [ + 16384, + -8192 + ], + [ + 12288, + -4096 + ] + ], + [ + [ + -32767, + -24575 + ], + [ + 28671, + 20479 + ] + ] + ] + ], + "scale": [ + 0.00024414807580797754, + 0.00024414807580797754 + ], + "zero_point": 0.0, + "min": [ + -8.0, + -8.0 + ], + "max": [ + 8.0, + 8.0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..e02eeb9dc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.002048635622486472, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..cd3479781 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json @@ -0,0 +1,34 @@ +{ + "weights": [ + [ + [ + [ + 0.9725494384765625, + 1.945098876953125, + 3.039216995239258, + 4.0117645263671875 + ], + [ + -8.996077537536621, + 9.9686279296875, + -10.94117546081543, + 12.035295486450195 + ] + ], + [ + [ + 4.98431396484375, + 5.9568634033203125, + 7.050981521606445, + 8.023530960083008 + ], + [ + 13.007843017578125, + -13.980391502380371, + 14.95294189453125, + -16.04705810546875 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json new file mode 100644 index 000000000..e60ff312e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json @@ -0,0 +1,9 @@ +{ + "weights": [ + 2156, + 4312, + 6468, + 8624 + ], + "scale": 0.0004638272181067826 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..4ec4ef2d7 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0038153529167175293, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json new file mode 100644 index 000000000..01835fbde --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json @@ -0,0 +1,38 @@ +{ + "weights": [ + [ + [ + [ + 140, + 148, + 157, + 165 + ], + [ + 58, + 214, + 42, + 231 + ] + ], + [ + [ + 173, + 181, + 190, + 198 + ], + [ + 239, + 17, + 255, + 0 + ] + ] + ] + ], + "scale": 0.12156862765550613, + "zero_point": 132.0, + "min": -16.04705810546875, + "max": 14.952940940856934 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json new file mode 100644 index 000000000..39c64f3ef --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.07362665981054306, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..20c1f6759 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json @@ -0,0 +1,34 @@ +{ + "weights": [ + [ + [ + [ + 1.00018310546875, + 2.0, + 2.99981689453125, + 4.0001220703125 + ], + [ + -9.00006103515625, + 10.0, + -10.99993896484375, + 11.9998779296875 + ] + ], + [ + [ + 5.0001220703125, + 6.0, + 6.9998779296875, + 8.000244140625 + ], + [ + 13.0, + -14.0, + 15.0, + -16.0 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json new file mode 100644 index 000000000..632333144 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json @@ -0,0 +1,14 @@ +{ + "weights": [ + 17503969, + 32507370, + 45510319, + 56887898 + ], + "scale": [ + 5.7129901172951205e-08, + 6.152450895548591e-08, + 6.591911673802062e-08, + 7.031372452055533e-08 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..7105a686d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014399811334442347, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json new file mode 100644 index 000000000..d465a7c17 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json @@ -0,0 +1,53 @@ +{ + "weights": [ + [ + [ + [ + 2521, + 4681, + 6553, + 8192 + ], + [ + -22685, + 23405, + -24029, + 24575 + ] + ], + [ + [ + 12603, + 14043, + 15291, + 16384 + ], + [ + 32767, + -32767, + 32767, + -32767 + ] + ] + ] + ], + "scale": [ + 0.0003967406231879635, + 0.0004272591326639607, + 0.0004577776421399579, + 0.0004882961516159551 + ], + "zero_point": 0.0, + "min": [ + -13.0, + -14.0, + -15.0, + -16.0 + ], + "max": [ + 13.0, + 14.0, + 15.0, + 16.0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..2d84cd7d8 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0031168656423687935, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json new file mode 100644 index 000000000..e1da53ab0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json @@ -0,0 +1,76 @@ +{ + "weights": [ + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608497619629, + 6.023530006408691, + -7.027451515197754, + 7.9686279296875, + 4.01568603515625, + -2.007843494415283, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608497619629, + 6.023530006408691, + -7.027451515197754, + 7.9686279296875, + 4.01568603515625, + -2.007843494415283, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608497619629, + 6.023530006408691, + -7.027451515197754, + 7.9686279296875, + 4.01568603515625, + -2.007843494415283, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ], + [ + 1.0039215087890625, + 2.007843017578125, + -3.0117650032043457, + -4.015686511993408, + -5.019608497619629, + 6.023530006408691, + -7.027451515197754, + 7.9686279296875, + 4.01568603515625, + -2.007843494415283, + 3.0117645263671875, + -1.0039215087890625, + -7.9686279296875, + -6.023530006408691, + 7.027451515197754, + 5.019608497619629 + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json new file mode 100644 index 000000000..ecb49bb64 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json @@ -0,0 +1,9 @@ +{ + "weights": [ + 415, + -829, + -1244, + 1658 + ], + "scale": 0.00241205753304663 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json new file mode 100644 index 000000000..654824b5d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03844216465950012, + "zero_point": 126.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json new file mode 100644 index 000000000..3baa42155 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json @@ -0,0 +1,4 @@ +{ + "scale": 0.741962730884552, + "zero_point": 156.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json new file mode 100644 index 000000000..940224049 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json @@ -0,0 +1,80 @@ +{ + "weights": [ + [ + 143, + 159, + 79, + 63, + 47, + 223, + 15, + 254, + 191, + 95, + 175, + 111, + 0, + 31, + 239, + 207 + ], + [ + 143, + 159, + 79, + 63, + 47, + 223, + 15, + 254, + 191, + 95, + 175, + 111, + 0, + 31, + 239, + 207 + ], + [ + 143, + 159, + 79, + 63, + 47, + 223, + 15, + 254, + 191, + 95, + 175, + 111, + 0, + 31, + 239, + 207 + ], + [ + 143, + 159, + 79, + 63, + 47, + 223, + 15, + 254, + 191, + 95, + 175, + 111, + 0, + 31, + 239, + 207 + ] + ], + "scale": 0.062745101749897, + "zero_point": 127.0, + "min": -7.9686279296875, + "max": 8.031373023986816 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json new file mode 100644 index 000000000..559e537fc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json @@ -0,0 +1,76 @@ +{ + "weights": [ + [ + 1.000030517578125, + 2.00006103515625, + -3.000091552734375, + -4.0001220703125, + -4.999908447265625, + 5.99993896484375, + -6.999969482421875, + 8.0, + 4.0001220703125, + -2.00006103515625, + 3.000091552734375, + -1.000030517578125, + -8.0, + -5.99993896484375, + 6.999969482421875, + 4.999908447265625 + ], + [ + 1.000030517578125, + 2.00006103515625, + -3.000091552734375, + -4.0001220703125, + -4.999908447265625, + 5.99993896484375, + -6.999969482421875, + 8.0, + 4.0001220703125, + -2.00006103515625, + 3.000091552734375, + -1.000030517578125, + -8.0, + -5.99993896484375, + 6.999969482421875, + 4.999908447265625 + ], + [ + 1.000030517578125, + 2.00006103515625, + -3.000091552734375, + -4.0001220703125, + -4.999908447265625, + 5.99993896484375, + -6.999969482421875, + 8.0, + 4.0001220703125, + -2.00006103515625, + 3.000091552734375, + -1.000030517578125, + -8.0, + -5.99993896484375, + 6.999969482421875, + 4.999908447265625 + ], + [ + 1.000030517578125, + 2.00006103515625, + -3.000091552734375, + -4.0001220703125, + -4.999908447265625, + 5.99993896484375, + -6.999969482421875, + 8.0, + 4.0001220703125, + -2.00006103515625, + 3.000091552734375, + -1.000030517578125, + -8.0, + -5.99993896484375, + 6.999969482421875, + 4.999908447265625 + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json new file mode 100644 index 000000000..0186c03f4 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json @@ -0,0 +1,14 @@ +{ + "weights": [ + 27619368, + -55238737, + -82858105, + 110477474 + ], + "scale": [ + 3.620647604581258e-08, + 3.620647604581258e-08, + 3.620647604581258e-08, + 3.620647604581258e-08 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json new file mode 100644 index 000000000..1fd68cabe --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014829720021225512, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json new file mode 100644 index 000000000..b2950218c --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003870659740641713, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json new file mode 100644 index 000000000..69254d12b --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json @@ -0,0 +1,95 @@ +{ + "weights": [ + [ + 4096, + 8192, + -12288, + -16384, + -20479, + 24575, + -28671, + 32767, + 16384, + -8192, + 12288, + -4096, + -32767, + -24575, + 28671, + 20479 + ], + [ + 4096, + 8192, + -12288, + -16384, + -20479, + 24575, + -28671, + 32767, + 16384, + -8192, + 12288, + -4096, + -32767, + -24575, + 28671, + 20479 + ], + [ + 4096, + 8192, + -12288, + -16384, + -20479, + 24575, + -28671, + 32767, + 16384, + -8192, + 12288, + -4096, + -32767, + -24575, + 28671, + 20479 + ], + [ + 4096, + 8192, + -12288, + -16384, + -20479, + 24575, + -28671, + 32767, + 16384, + -8192, + 12288, + -4096, + -32767, + -24575, + 28671, + 20479 + ] + ], + "scale": [ + 0.00024414807580797754, + 0.00024414807580797754, + 0.00024414807580797754, + 0.00024414807580797754 + ], + "zero_point": 0.0, + "min": [ + -8.0, + -8.0, + -8.0, + -8.0 + ], + "max": [ + 8.0, + 8.0, + 8.0, + 8.0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..9bf6c9bff --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03876218944787979, + "zero_point": 126.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..87de1116e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.029836513102054596, + "zero_point": 88.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..5d9052815 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015059474390000105, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..25491f05d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014986195310484618, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..ede36c6ad --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.039086975157260895, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..bd2fc7f62 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.028692100197076797, + "zero_point": 131.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..18c3b0421 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015251495642587543, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..145ee8fda --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00013844699424225837, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json new file mode 100644 index 000000000..394cfb322 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json @@ -0,0 +1,5 @@ +{ + "weights": [ + -1 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..bbff8952d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03780897706747055, + "zero_point": 131.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json new file mode 100644 index 000000000..ec6082d55 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json @@ -0,0 +1,32 @@ +{ + "weights": [ + [ + [ + [ + 136, + 153, + 68 + ], + [ + 51, + 34, + 221 + ] + ], + [ + [ + 0, + 255, + 187 + ], + [ + 85, + 170, + 102 + ] + ] + ] + ], + "scale": 0.05882352963089943, + "zero_point": 119.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..cec0bdf9a --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.232084259390831, + "zero_point": 111.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json new file mode 100644 index 000000000..f329b43be --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0001513722527306527, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json new file mode 100644 index 000000000..ab968c9fc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json @@ -0,0 +1,32 @@ +{ + "weights": [ + [ + [ + [ + 4096, + 8192, + -12288 + ], + [ + -16384, + -20479, + 24575 + ] + ], + [ + [ + -28671, + 32767, + 16384 + ], + [ + -8192, + 12288, + -4096 + ] + ] + ] + ], + "scale": 0.0002441480755805969, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..4b5118c3e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.000991688808426261, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json new file mode 100644 index 000000000..7c001602f --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json @@ -0,0 +1,13 @@ +{ + "weights": [ + [ + [ + 51, + 153, + 255 + ] + ] + ], + "scale": 0.0019607844296842813, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..05ce9dd2c --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03849203139543533, + "zero_point": 127.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..8f883094a --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.02848827838897705, + "zero_point": 82.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json new file mode 100644 index 000000000..6f99899d5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json @@ -0,0 +1,21 @@ +{ + "weights": [ + [ + [ + 1, + 1, + 1 + ] + ] + ], + "scale": [ + 0.10000000149011612, + 0.30000001192092896, + 0.5 + ], + "zero_point": [ + 0, + 0, + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..7d1f4c795 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015214986342471093, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..533c1e3e0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015159364556893706, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..3b97773ce --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03907399624586105, + "zero_point": 127.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..698a8a7ee --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.01955186203122139, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..5a52a1b7b --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0001474507007515058, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..ff9e41ec8 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0001422425702912733, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..aaba6131c --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.038689617067575455, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json new file mode 100644 index 000000000..aaba6131c --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json @@ -0,0 +1,4 @@ +{ + "scale": 0.038689617067575455, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json new file mode 100644 index 000000000..aaba6131c --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json @@ -0,0 +1,4 @@ +{ + "scale": 0.038689617067575455, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json new file mode 100644 index 000000000..ac7cde187 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json @@ -0,0 +1,5 @@ +{ + "weights": [ + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..2fb0c68d8 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014983004075475037, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json new file mode 100644 index 000000000..2fb0c68d8 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014983004075475037, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json new file mode 100644 index 000000000..2fb0c68d8 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00014983004075475037, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json new file mode 100644 index 000000000..ac7cde187 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json @@ -0,0 +1,5 @@ +{ + "weights": [ + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json new file mode 100644 index 000000000..76a0440a0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 0.960784912109375, + 2.0588245391845703 + ], + [ + -3.0196075439453125, + -3.980391502380371 + ], + [ + 4.9411773681640625, + -6.039215087890625 + ] + ], + [ + [ + 7.0, + 7.960784912109375 + ], + [ + -9.058823585510254, + -10.019607543945312 + ], + [ + 10.980392456054688, + -11.941176414489746 + ] + ], + [ + [ + 13.039216995239258, + 14.000001907348633 + ], + [ + -14.960784912109375, + -16.05882453918457 + ], + [ + 17.019607543945312, + -17.980392456054688 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json new file mode 100644 index 000000000..dc5ca8dd5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03869570419192314, + "zero_point": 126.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json new file mode 100644 index 000000000..bc150bbb0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json @@ -0,0 +1,52 @@ +{ + "weights": [ + [ + [ + [ + 138, + 146 + ], + [ + 109, + 102 + ], + [ + 167, + 87 + ] + ], + [ + [ + 182, + 189 + ], + [ + 65, + 58 + ], + [ + 211, + 44 + ] + ], + [ + [ + 226, + 233 + ], + [ + 22, + 14 + ], + [ + 255, + 0 + ] + ] + ] + ], + "scale": 0.13725490868091583, + "zero_point": 131.0, + "min": -17.980392456054688, + "max": 17.019609451293945 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json new file mode 100644 index 000000000..bfd862189 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 1.6333034038543701, + "zero_point": 127.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json new file mode 100644 index 000000000..6df24eb42 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json @@ -0,0 +1,48 @@ +{ + "weights": [ + [ + [ + [ + 0.999786376953125, + 2.0001220703125 + ], + [ + -2.999908447265625, + -4.000244140625 + ], + [ + 5.000030517578125, + -5.99981689453125 + ] + ], + [ + [ + 7.000152587890625, + 7.99993896484375 + ], + [ + -9.000274658203125, + -10.00006103515625 + ], + [ + 10.999847412109375, + -12.00018310546875 + ] + ], + [ + [ + 12.999969482421875, + 13.999755859375 + ], + [ + -15.000091552734375, + -15.9998779296875 + ], + [ + 17.000213623046875, + -18.0 + ] + ] + ] + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json new file mode 100644 index 000000000..82f7fa2b6 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json @@ -0,0 +1,4 @@ +{ + "scale": 0.00015178922330960631, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json new file mode 100644 index 000000000..8d0ceb1c6 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json @@ -0,0 +1,58 @@ +{ + "weights": [ + [ + [ + [ + 1820, + 3641 + ], + [ + -5461, + -7282 + ], + [ + 9102, + -10922 + ] + ], + [ + [ + 12743, + 14563 + ], + [ + -16384, + -18204 + ], + [ + 20024, + -21845 + ] + ], + [ + [ + 23665, + 25485 + ], + [ + -27306, + -29126 + ], + [ + 30947, + -32767 + ] + ] + ] + ], + "scale": [ + 0.0005493331705679495 + ], + "zero_point": 0.0, + "min": [ + -18.0 + ], + "max": [ + 18.0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..f370bf44d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.0122029148042202, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake index 4eb7204e1..5ce8dfb5d 100644 --- a/compiler/pota-quantization-value-test/requires.cmake +++ b/compiler/pota-quantization-value-test/requires.cmake @@ -2,4 +2,4 @@ require("record-minmax") require("circle-quantizer") require("circle-tensordump") require("common-artifacts") -require("mio-circle") +require("mio-circle04") diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst index 4beec8c0e..e169de57c 100644 --- a/compiler/pota-quantization-value-test/test.lst +++ b/compiler/pota-quantization-value-test/test.lst @@ -31,3 +31,32 @@ addTest(Split_000 channel int16) addTest(TransposeConv_001 channel uint8) addTest(TransposeConv_001 channel int16) addTest(TransposeConv_001 layer uint8) + +addQConfTest(Add_002 layer uint8) +addQConfTest(Add_002 channel int16) +addQConfTest(AveragePool2D_000 layer uint8) +addQConfTest(AveragePool2D_000 channel int16) +addQConfTest(Concatenation_001 layer uint8) +addQConfTest(Concatenation_001 channel int16) +addQConfTest(Conv2D_004 channel int16) +addQConfTest(Conv2D_004 layer uint8) +addQConfTest(DepthwiseConv2D_002 channel int16) +addQConfTest(DepthwiseConv2D_002 layer uint8) +addQConfTest(FullyConnected_003 channel int16) +addQConfTest(FullyConnected_003 layer uint8) +#addQConfTest(InstanceNorm_001 layer uint8) Enable this when int16 CWQ data is ready. +#addQConfTest(InstanceNorm_001 channel int16) Enable this when int16 CWQ data is ready. +addQConfTest(Mean_000 layer uint8) +addQConfTest(Mean_000 channel int16) +addQConfTest(MaxPool2D_000 layer uint8) +addQConfTest(MaxPool2D_000 channel int16) +addQConfTest(Mul_001 layer uint8) +addQConfTest(Mul_001 channel int16) +addQConfTest(PRelu_001 layer uint8) +addQConfTest(PRelu_001 channel int16) +addQConfTest(ReLU_000 layer uint8) +addQConfTest(ReLU_000 channel int16) +addQConfTest(Split_000 channel uint8) +addQConfTest(Split_000 channel int16) +addQConfTest(TransposeConv_001 channel int16) +addQConfTest(TransposeConv_001 layer uint8) diff --git a/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh new file mode 100755 index 000000000..070b2738e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# This script tests fake quantization with config file +# +# HOW TO USE +# +# ./test_fake_wquant_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ... +# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH} +# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test) + +SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py" +CONFIG_PATH="$1"; shift +BIN_PATH=$(dirname "${CONFIG_PATH}") +WORKDIR="$1"; shift + +source "${CONFIG_PATH}" + +echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}" +echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}" +echo "-- Found workdir: ${WORKDIR}" + +TESTED=() +PASSED=() +FAILED=() + +pushd "${WORKDIR}" +while [ "$1" != "" ]; do + MODELNAME=$1; shift + GRANULARITY=$1; shift + DTYPE=$1; shift + TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}" + + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${WORKDIR}/${TESTCASE}" + TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.fake_quantized.mixed.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}_fake_quantization_with_config.log" <( + exec 2>&1 + set -ex + + # Run circle-quantizer with --quantize_dequantize_weights + "${CIRCLE_QUANTIZER_PATH}" \ + --quantize_dequantize_weights float32 "${DTYPE}" "${GRANULARITY}" \ + --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \ + "${WORKDIR}/${MODELNAME}.circle" \ + "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" + + # Dump weights values (circle-tensordump) + "${CIRCLE_TENSORDUMP_PATH}" \ + "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \ + --tensors_to_hdf5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5" + + # Compare result + "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \ + --input_h5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5" \ + --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/fake_quantization" \ + --mode fake_quantization + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("$TESTCASE") + else + FAILED+=("$TESTCASE") + fi +done +popd + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt new file mode 100644 index 000000000..b6e2efa3d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt @@ -0,0 +1 @@ +-0.8596993, 4.8127713,-3.4127183, 4.2323627,-2.2201376,-1.5362649,-4.9921966, 0.9565166, 3.2879171,-1.3590081,-3.771852 ,-4.1042285 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt new file mode 100644 index 000000000..bcf2807ba --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt @@ -0,0 +1 @@ + 0.14624089, 4.7304125 , 4.833998 , 4.2321773 ,-2.0582533 ,-2.3694758 , 1.4213978 , 2.2444596 , 3.3630798 ,-0.70257574, 3.586656 ,-2.513805 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt new file mode 100644 index 000000000..c3e32d2c5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt @@ -0,0 +1 @@ + 2.175218 , 0.02776978,-2.6291077 , 3.5350094 ,-1.2364857 ,-3.3151364 ,-0.92507887, 2.8038094 ,-1.8781518 , 3.6221995 , 2.4015775 ,-2.9217577 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt new file mode 100644 index 000000000..a92abd4f6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt @@ -0,0 +1 @@ +-1.0345451,-1.5055941,-4.144375 ,-4.727011 , 1.5841546, 4.5780725,-4.24402 ,-2.3966947,-3.0370803,-1.0234503,-0.2750057, 3.2965126 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt new file mode 100644 index 000000000..2f2937fcb --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt @@ -0,0 +1 @@ +-2.4460397 , 2.6090143 , 4.1773095 , 0.11204174,-3.3053472 , 2.5160108 ,-3.0612547 , 1.0667087 , 2.8952355 , 3.842513 , 0.6790793 ,-0.33375 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt new file mode 100644 index 000000000..a219546a1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt @@ -0,0 +1 @@ +-0.48516417,-4.5555663 ,-2.9907737 , 2.422857 , 1.010034 , 3.6436582 , 0.29334423,-4.0628953 , 1.0116768 , 3.0871766 , 3.3341465 , 4.3921704 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt new file mode 100644 index 000000000..70d3139a0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt @@ -0,0 +1 @@ +-0.7787985 , 4.101575 ,-0.4839729 , 0.35971674,-4.3452406 ,-4.811665 ,-3.8693128 , 4.239326 , 0.44103175, 3.5549765 , 2.5334291 , 1.4546562 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt new file mode 100644 index 000000000..3c38f8d5d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 3.5943313,-1.4843192, 1.956341 ,-1.3242344, 1.5901331,-3.641623 , 4.6022506,-0.307265 ,-0.6359913,-4.0109854,-1.2064985, 1.1137954 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt new file mode 100644 index 000000000..e89a022f5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 3.1036437 ,-0.39538398,-0.07278133, 4.547673 , 3.9132211 , 2.6468625 ,-4.2830634 ,-2.0573084 , 2.1074655 ,-4.0634165 ,-4.55598 ,-0.7942089 diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt new file mode 100644 index 000000000..2b00832cd --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-2.7745228, 1.4813256, 4.4699864, 3.7466738,-2.9847758,-4.453416 , 3.2515864,-1.2459193,-4.44965 ,-1.8452735, 4.423347 , 4.2998137 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt new file mode 100644 index 000000000..e42cbf88b --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt @@ -0,0 +1 @@ +-4.1358833e+00, 1.7854472e+00, 4.1751757e+00, 5.5915713e-01,-2.6459083e-01,-1.7176826e+00,-1.8155930e+00, 2.8710868e+00,-2.7043006e+00, 1.0959731e+00,-2.0176995e+00,-6.5950048e-01,-3.6413522e+00,-4.1966043e+00,-2.6820884e+00,-3.6055098e+00, 3.6852844e+00, 8.9128174e-02, 1.3107824e+00,-3.6425626e+00,-3.2318896e-01, 3.6238370e+00,-4.9837337e+00,-4.0550299e+00,-1.4882606e+00, 1.5547658e+00,-1.1696080e+00, 2.1651111e+00, 4.9318314e+00,-3.5928023e+00,-1.2348548e+00,-1.7002642e+00, 1.7365140e+00,-8.8151926e-01,-4.1655774e+00,-1.0166957e+00,-3.7440193e+00, 2.8588972e+00, 4.1286149e+00,-4.9504828e+00, 4.8477168e+00,-2.2587967e+00, 2.8542519e+00,-7.9565448e-01, 6.8252671e-01, 2.5875571e-01,-6.3935977e-01,-4.8547015e+00, 4.1373856e-03,-1.3893708e+00, 8.8775367e-01, 2.1222150e-01, 3.1871333e+00, 1.3869151e+00,-3.8274391e+00, 3.2623324e+00, 7.2669631e-01, 1.0303619e+00, 8.1438148e-01, 8.1272924e-01,-2.7527118e+00, 1.8215455e+00,-1.6416427e-01, 4.9103169e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt new file mode 100644 index 000000000..7caf8ce9e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt @@ -0,0 +1 @@ +-4.250757 , 1.4186406 , 0.63726735,-0.35924944, 1.9436699 , 3.2695885 , 3.6638293 , 4.5166173 , 1.3807241 ,-1.9112543 ,-1.9026492 ,-0.4800549 , 2.818216 ,-4.6390033 ,-3.8570547 , 3.6634028 ,-1.2112037 ,-1.3335027 , 1.3524677 , 2.7240725 ,-3.8335826 , 1.1397903 ,-3.1570992 ,-4.802078 , 3.8334577 , 0.23457901, 0.7132307 , 2.9887354 , 2.9702394 ,-1.4113717 ,-0.66712093, 0.77366674, 1.9308351 ,-0.45465755, 4.925366 , 2.4214447 , 2.8401468 , 0.49789894, 0.53141665,-2.7466767 , 0.2059374 ,-4.9661317 ,-4.1334467 , 1.6928389 ,-0.42529574, 1.1033608 , 4.275776 , 1.5063075 , 2.3528252 , 0.79505247, 3.9829993 ,-4.8472476 ,-1.2752185 , 3.7365675 , 1.976164 ,-4.742636 ,-2.7199092 ,-2.9191706 ,-3.181069 ,-4.489485 , 4.0847454 , 2.2164 , 0.9725334 ,-0.72566307 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt new file mode 100644 index 000000000..7facffa57 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt @@ -0,0 +1 @@ +-3.8293874 ,-0.13678598,-2.5444264 , 1.654611 ,-4.3037786 ,-3.4240584 ,-4.5642533 , 4.1250315 , 1.0469195 , 4.2802887 , 3.1617825 ,-3.1706758 ,-0.99622065, 2.7707603 , 3.7494645 ,-1.4548893 , 2.328633 , 1.7976477 ,-1.2107176 ,-2.0178459 ,-0.6488357 ,-2.9393644 , 2.8918762 , 3.6192262 ,-4.1777225 , 1.3264071 , 0.32620123, 0.7890992 ,-3.304334 , 3.4893208 , 2.5354576 ,-4.7718143 , 3.8602633 , 0.4927564 , 2.2971296 ,-0.3296792 , 2.8115997 ,-0.75152504, 0.558675 ,-2.343631 , 4.650826 ,-3.0893488 , 0.8726873 , 0.24922371, 2.7634025 , 1.0358421 ,-3.862506 ,-3.169402 ,-2.5373347 , 0.9484093 , 4.1409917 ,-4.0408096 ,-2.7231216 ,-2.548547 ,-2.6315095 , 0.8164778 ,-3.017436 , 1.1860138 ,-1.8634807 , 1.8684052 , 1.8657844 , 1.7747321 ,-3.1472425 ,-1.3989028 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt new file mode 100644 index 000000000..0be8fdd19 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt @@ -0,0 +1 @@ +-2.0492268 ,-2.2555764 ,-1.3543441 ,-3.7278662 ,-4.8601675 , 3.1095552 , 4.6319957 , 3.0211062 , 1.7870535 , 4.8839574 ,-1.3494394 , 2.635408 ,-0.24201432, 1.312397 , 0.16790341, 2.42507 ,-3.101355 , 3.1760497 ,-4.500736 ,-2.53691 , 1.064206 , 0.62096214, 2.803344 ,-4.6166744 ,-4.624786 , 3.667064 ,-1.484021 , 4.9401817 ,-3.763283 , 3.4351027 ,-2.906393 , 4.9945946 ,-3.2997096 , 3.6325612 ,-0.47211674, 0.28783202, 1.8703817 ,-4.042374 ,-3.3353784 , 4.9085765 ,-1.6753131 ,-3.4926984 ,-4.8663344 ,-4.495712 , 2.3402312 ,-1.0722051 , 0.28559962, 2.1208072 , 1.3024254 , 3.4810693 , 0.09860361, 1.695624 , 1.3901931 , 1.6858819 , 3.8231227 , 4.5972557 ,-4.6835494 , 0.5753765 ,-2.2377403 , 0.13013013,-2.1165738 ,-0.26044115,-0.653468 , 1.1010929 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt new file mode 100644 index 000000000..7e2d618f9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt @@ -0,0 +1 @@ + 4.397323 ,-0.51448834, 2.5729322 ,-4.3229046 , 1.149113 ,-3.8652143 ,-1.7352968 ,-0.7575065 ,-0.41720778, 4.327346 ,-4.2363043 , 0.8653738 ,-1.7511971 ,-0.7874244 ,-4.0734816 , 2.5622475 ,-3.1229742 ,-1.1783633 , 0.4017013 ,-0.76175183,-1.058416 , 1.128772 ,-3.0143378 ,-2.6688366 ,-2.575279 ,-4.326955 , 4.175434 , 4.791393 ,-1.10654 ,-4.4417224 , 3.5057635 , 1.5339037 ,-4.0297494 ,-3.7187057 ,-0.6645762 , 4.215642 , 1.6742749 , 2.5468905 , 1.73195 ,-3.3100636 ,-4.4818826 ,-2.5627983 ,-1.4624406 , 1.2433167 ,-4.005364 ,-4.3450556 ,-1.0652863 ,-1.0240986 , 3.989825 ,-4.1690702 ,-4.595108 ,-1.1154945 , 0.65749156, 2.5127344 , 2.509761 ,-4.3936505 , 3.6513395 ,-2.3340352 ,-4.3615093 , 3.5973237 , 0.9316653 , 1.9391845 , 3.6356397 , 0.8133118 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt new file mode 100644 index 000000000..2a6b09b27 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt @@ -0,0 +1 @@ +-4.629505 , 1.0121975 ,-0.13417433,-2.329806 ,-3.4927373 ,-0.7574039 ,-2.2674313 , 3.1983519 , 2.4298382 ,-0.23268977, 2.0218065 ,-1.5087285 ,-1.3953347 ,-3.8100643 ,-1.7438283 , 3.9852605 , 2.9817178 ,-4.0460877 , 0.09402129, 4.3802586 ,-1.0991771 , 0.4134776 , 2.8136911 ,-3.6254618 ,-3.925183 , 4.691824 , 4.381538 ,-3.235543 ,-2.6764185 , 2.659456 ,-3.2127233 , 0.0206281 , 3.4056723 ,-1.693684 , 1.1005328 ,-3.1486542 , 0.77198106, 1.4526777 ,-2.3614178 , 4.8214664 ,-3.1486242 , 0.58941853,-4.1100698 , 4.1982718 , 1.7219902 ,-2.4375956 ,-1.7505955 , 1.7465224 ,-2.7494361 , 4.0679016 , 1.8936038 ,-4.523818 ,-3.4124248 ,-4.809946 ,-1.939553 , 4.9411273 , 1.6261404 ,-2.6846552 , 2.1339247 , 0.61396503,-1.6662381 , 2.4282491 , 2.662007 ,-0.40868336 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt new file mode 100644 index 000000000..470da6c74 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 0.70593804, 3.253847 , 1.1094694 , 0.5295975 , 0.5944647 ,-2.4391694 , 4.7912955 , 4.4374456 ,-2.942428 ,-3.5038033 ,-3.180417 , 2.1914082 ,-4.5295396 ,-3.0037553 ,-2.265191 , 0.20113531, 2.3805366 ,-0.9111223 ,-4.3170924 , 4.08436 , 1.1006241 ,-1.286977 , 4.811279 , 0.9131829 , 3.2051497 ,-2.8660698 ,-3.188871 , 1.4163305 , 4.061829 , 2.7783196 ,-3.4975152 , 3.4888391 , 2.5789826 ,-1.5264264 ,-0.13952135,-1.280177 , 2.4716458 , 2.6200528 ,-2.515086 , 3.441416 , 2.4515297 ,-0.9845471 , 0.9481396 , 1.1518412 , 1.6088997 , 1.445077 , 2.2620194 ,-2.0843177 ,-0.7263964 , 1.8159748 ,-3.3673623 , 0.2554476 ,-4.3550563 ,-1.4280493 ,-2.2702312 ,-4.7424164 ,-0.57241255,-2.813357 , 2.9161859 ,-0.9036504 , 0.00511268, 0.60724795, 4.8010454 , 1.6000834 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt new file mode 100644 index 000000000..d9e048b61 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 7.07888961e-01, 4.75798702e+00,-1.47843570e-01,-1.95845592e+00, 4.26537895e+00,-3.03711486e+00,-1.35137546e+00,-1.10638596e-01,-1.02415502e+00,-2.65345359e+00, 5.48920631e-01,-4.38003826e+00, 3.61377740e+00,-2.91408587e+00,-3.22874010e-01,-4.74363208e-01, 3.45294738e+00, 1.02204478e+00,-1.44102740e+00, 6.80687547e-01,-2.44050741e+00, 3.71395111e+00,-2.14443612e+00, 3.70928717e+00, 1.35871637e+00, 9.73374963e-01, 1.57826161e+00,-2.91381836e-01, 1.46376801e+00, 2.96391749e+00, 1.08418810e+00,-3.50718546e+00, 4.68637037e+00, 1.04839933e+00, 2.24482760e-01, 2.38816309e+00, 3.18772525e-01,-3.90284014e+00,-3.32757282e+00,-1.61143410e+00,-1.26013708e+00, 2.24948835e+00, 7.63151050e-01, 4.18296242e+00,-8.69123042e-01, 3.19850564e-01, 3.52391124e-01, 3.30018830e+00,-4.64861393e+00,-4.64479780e+00,-2.68103647e+00,-1.13277221e+00, 2.02201343e+00,-4.05572534e-01, 3.06759548e+00,-3.55881310e+00,-1.14900565e+00,-3.00835490e+00, 1.31509733e+00, 2.50206441e-01, 2.47731134e-01, 4.98673916e+00,-1.74064383e-01,-4.43180744e-03 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt new file mode 100644 index 000000000..cdbf98e8a --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 3.5591762 , 4.8821726 , 0.44271094, 4.786732 ,-2.4497197 , 2.4973536 , 2.034311 , 4.8329844 ,-3.9451184 , 4.9937835 , 2.0246332 ,-2.8319602 , 3.9617133 , 4.10946 ,-4.3191586 ,-2.8492777 ,-2.648121 ,-4.199404 ,-0.05163948,-4.7944984 , 2.8989205 , 1.4747709 ,-3.1194637 ,-2.877846 ,-0.39301065, 2.616311 , 2.6305614 , 1.7303206 , 3.6059175 ,-2.745988 , 2.5924454 , 3.0149276 , 4.0359216 ,-0.6135884 ,-2.5023808 ,-2.3395267 ,-3.0633461 ,-2.3836162 ,-4.4779797 ,-1.30866 , 1.9110863 , 0.654628 ,-4.559368 , 0.34231895,-0.8196542 , 4.7275734 , 3.2823656 ,-4.9644713 , 2.9191613 ,-3.4621727 ,-4.276584 ,-1.7153062 , 1.8820064 , 1.2659297 , 3.4141889 ,-4.905296 , 4.619848 ,-3.9501083 ,-1.5550466 , 3.6841137 , 1.7121594 , 1.9466268 , 1.5684807 , 4.5554323 diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt new file mode 100644 index 000000000..065d77df6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-2.2269225 ,-1.2782103 ,-3.381931 ,-1.5229299 , 2.0681949 , 1.7630705 ,-0.81455594,-2.6558595 ,-3.4870632 ,-4.647749 , 2.4453654 ,-2.242679 ,-1.0272806 , 0.5656208 , 0.69442594,-4.4343104 ,-3.9649677 ,-3.8908577 ,-1.642287 , 3.0714357 , 1.0880747 ,-2.1665683 ,-4.0994506 , 2.004911 , 3.5922902 , 3.775 , 1.1580672 ,-1.4154137 ,-4.4964633 ,-1.696588 , 4.0220857 ,-1.2785947 ,-4.2075186 ,-4.515838 , 0.99715126, 3.0928102 ,-2.295537 ,-4.772882 ,-1.2936146 ,-2.6903791 , 0.10453273,-1.8041211 , 3.787591 , 0.9493053 ,-4.41586 , 3.4252715 ,-0.25001565, 4.655357 ,-1.8767506 , 0.00600041, 4.660605 , 2.550518 ,-3.830558 , 1.7777463 ,-0.7170577 ,-0.26554853,-3.5770113 ,-1.1354474 , 4.663121 , 3.100427 , 0.03313563,-1.7419808 ,-1.4426676 ,-3.912533 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt new file mode 100644 index 000000000..9def1c2eb --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt @@ -0,0 +1 @@ +0.24671102,3.271825 ,3.979895 ,1.3334678 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt new file mode 100644 index 000000000..eaec2409f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt @@ -0,0 +1 @@ + 1.9181111, 2.2396102,-2.8641696,-1.9045062 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt new file mode 100644 index 000000000..3e05181cc --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt @@ -0,0 +1 @@ +4.751434 ,2.8798263 ,0.15149078,2.9485583 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt new file mode 100644 index 000000000..19d95b267 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt @@ -0,0 +1 @@ +-1.5743442 , 0.6716824 , 0.75737774,-0.27396253 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt new file mode 100644 index 000000000..d302e07a9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt @@ -0,0 +1 @@ +-1.0539489 , 1.9595883 , 0.19975437, 2.526178 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt new file mode 100644 index 000000000..af1c2dff8 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt @@ -0,0 +1 @@ +-4.0575085 , 2.5941508 ,-2.550309 ,-0.03760919 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt new file mode 100644 index 000000000..0ede613ac --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 0.4857123,-4.032874 ,-3.687589 ,-1.235227 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt new file mode 100644 index 000000000..b0b0392ba --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 0.21878362, 3.9175916 ,-4.6141233 , 3.709655 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt new file mode 100644 index 000000000..d8a8cad12 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt @@ -0,0 +1 @@ +-1.9645791,-1.4466153, 1.2543651,-1.0288917 diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt new file mode 100644 index 000000000..ca2a1c3b4 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-2.1611342, 2.4875243, 3.096089 ,-1.1327268 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt new file mode 100644 index 000000000..0614b5e83 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt @@ -0,0 +1 @@ +0.01090685,0.0581577 ,0.637094 ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt new file mode 100644 index 000000000..b1c39382f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt @@ -0,0 +1 @@ +0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt new file mode 100644 index 000000000..7e562de75 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt @@ -0,0 +1 @@ +0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567 ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524 ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098 ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt new file mode 100644 index 000000000..2958a7f54 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt @@ -0,0 +1 @@ +0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911 ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt new file mode 100644 index 000000000..fc969308e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt @@ -0,0 +1 @@ +0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt new file mode 100644 index 000000000..f82ad6704 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 1.4040831 , 4.8621206 , 0.22880335,-0.3116556 , 0.260938 ,-0.61554366, 3.779648 ,-4.650609 , 3.886638 ,-0.25574106,-0.45002133, 4.9870906 ,-2.3277295 ,-4.9648423 ,-3.7695415 , 3.2857463 ,-4.5514555 ,-3.7705963 , 3.8458307 ,-4.797776 ,-3.4295716 ,-4.6026535 ,-1.4011091 , 2.8851774 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt new file mode 100644 index 000000000..722337286 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt @@ -0,0 +1 @@ +-4.171929 ,-2.2911541 , 2.8965824 , 0.27504483,-1.6088463 ,-0.6509234 ,-3.262618 , 0.9633116 , 2.4504175 , 0.97706884, 0.4212074 , 1.4083375 ,-2.9757218 ,-3.1010823 ,-1.7146534 , 4.105306 , 0.07195274, 3.0232217 ,-2.7568955 ,-4.8887763 ,-3.4171093 ,-0.91494775, 2.5260248 , 4.74184 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt new file mode 100644 index 000000000..1283a8ad1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 0.14139967, 1.9541235 ,-4.945228 ,-0.48999134, 3.7479703 , 0.29318067, 0.21036309, 4.357736 ,-4.3354783 ,-1.9236348 , 0.49615476,-1.8418436 ,-2.425741 , 4.817022 , 1.5093465 , 2.417444 ,-4.69463 , 0.3433745 ,-4.5979595 ,-3.9027495 ,-0.29977685, 4.9239326 ,-0.39175773, 1.277211 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt new file mode 100644 index 000000000..c931e1752 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt @@ -0,0 +1 @@ +-3.692852 ,-1.0075341 ,-2.4409268 , 0.92995465,-3.1325107 , 4.028981 , 0.8446181 ,-2.2990613 , 4.0820794 , 3.1633005 , 4.1527267 ,-3.9514909 , 2.6104712 , 4.660645 ,-1.7398617 , 0.15663597,-3.6861904 ,-2.9019265 , 3.8828175 ,-2.712909 , 4.3699546 ,-3.5953352 ,-3.0655813 , 0.59767616 diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt new file mode 100644 index 000000000..d33c2dbec --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-2.8695228 , 2.865197 , 0.6635586 , 0.22709726, 2.85572 ,-4.2051144 , 1.5833759 ,-4.4277377 , 4.0004573 , 2.4766827 , 3.0412688 ,-4.8891425 ,-4.489896 , 3.0812325 , 2.1947708 , 1.6387184 , 0.31932488,-0.41092923,-0.0730476 , 0.7265327 , 4.1333 , 3.157228 , 4.7395325 , 3.4576747 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt new file mode 100644 index 000000000..f4fb503ea --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt @@ -0,0 +1 @@ +0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt new file mode 100644 index 000000000..af4b01576 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt @@ -0,0 +1 @@ +0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375 ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt new file mode 100644 index 000000000..57716034e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt @@ -0,0 +1 @@ +0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829 ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt new file mode 100644 index 000000000..1e03d83b0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt @@ -0,0 +1 @@ +0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597 ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt new file mode 100644 index 000000000..89ee30a6b --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt @@ -0,0 +1 @@ +0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt new file mode 100644 index 000000000..cc434b0a8 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt @@ -0,0 +1 @@ +-4.0618963 ,-0.56899416,-2.6450877 , 2.4534085 , 1.98115 , 1.906561 ,-3.9617727 ,-0.6071247 , 3.1096997 , 4.4270124 ,-2.8755112 ,-1.8822336 ,-2.3567479 , 1.9797888 ,-3.5018713 , 3.429169 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt new file mode 100644 index 000000000..2c637a1d2 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt @@ -0,0 +1 @@ +-1.6089132 , 1.4328785 ,-3.2579598 ,-2.1328773 ,-2.6566415 , 2.541386 ,-4.3314023 , 0.48684084, 3.3134763 ,-2.69083 ,-0.45710313,-3.6763198 , 0.22075526,-3.159208 ,-2.1573126 , 4.1621423 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt new file mode 100644 index 000000000..4b57fe8e0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt @@ -0,0 +1 @@ +-4.061572 , 3.0518744 , 2.694435 ,-4.720131 , 1.3782452 , 4.083631 , 4.1221976 ,-1.2299284 , 3.096133 , 3.8382158 ,-1.9518853 , 4.350529 , 0.09219506, 2.6483617 , 0.74373996, 2.7447948 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt new file mode 100644 index 000000000..49c3022c2 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 4.68769 ,-3.2768764 , 3.1849844 , 4.497627 ,-1.2611016 ,-3.1152303 ,-0.8408633 , 0.4938034 , 4.0921655 ,-2.3150117 , 0.10100875,-3.8374226 , 4.08059 ,-0.74594986,-3.1000822 , 4.3654246 diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt new file mode 100644 index 000000000..e02c8ca16 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-3.6168842 , 4.1935644 , 0.73750836, 4.6044145 , 2.8967912 ,-1.8085694 , 4.539956 ,-0.37032878, 1.9738418 , 1.5388782 ,-2.945171 ,-3.3875864 ,-4.516983 ,-3.4998245 ,-4.676514 ,-2.2738194 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt new file mode 100644 index 000000000..233e5eae3 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt @@ -0,0 +1 @@ + 2.7731526 , 2.451602 , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679 ,-3.3615496 ,-4.619757 ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548 , 0.78964525 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt new file mode 100644 index 000000000..6a126081d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt @@ -0,0 +1 @@ + 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276 , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt new file mode 100644 index 000000000..eccd2c625 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt @@ -0,0 +1 @@ + 3.8758078 , 4.978636 ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644 ,-0.9515802 , 3.558274 , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853 ,-4.583811 ,-2.5113506 ,-4.6688786 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt new file mode 100644 index 000000000..0da05277c --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt @@ -0,0 +1 @@ +-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823 ,-2.7866168 ,-3.186763 ,-4.431844 , 3.3113294 , 0.9501982 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt new file mode 100644 index 000000000..ace24f7c1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt @@ -0,0 +1 @@ + 3.9716747 ,-2.254871 , 1.1943274 ,-2.212602 , 3.4311683 , 1.114989 , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908 ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt new file mode 100644 index 000000000..18b34c8b1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 1.5887886e+00,-4.7446389e+00,-8.6568648e-01,-2.9789083e+00, 4.4470620e+00,-4.6563668e+00,-3.8466794e+00, 1.8815753e-03,-2.7699089e+00, 5.2776605e-01, 3.6518128e+00,-3.0939088e+00,-3.6008542e+00, 7.2454107e-01, 2.2568390e+00,-4.4835806e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt new file mode 100644 index 000000000..d652da699 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 4.770412 ,-1.7520845 , 2.4057522 ,-0.74166125,-0.10780027, 4.5796657 ,-3.513094 ,-3.0285823 , 1.2001143 , 2.806742 ,-2.0503895 , 2.8160958 ,-1.5392824 ,-3.7772799 , 2.9158401 ,-1.0586692 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt new file mode 100644 index 000000000..e6d6e004f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 3.937408 ,-0.11191579, 2.2054992 , 2.847275 , 3.4895647 , 4.2361116 ,-3.2401278 ,-1.5813186 ,-4.558396 ,-0.89455926, 4.204445 , 3.5968838 , 2.773891 ,-2.9562843 ,-0.62606305,-0.03814701 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt new file mode 100644 index 000000000..8b472058e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 3.5032003 , 4.6036057 , 0.28915945, 4.671659 ,-1.978598 , 2.1773603 ,-0.54175234,-3.0131943 ,-2.7422159 ,-3.4361897 , 0.2850049 , 4.1412387 ,-4.86403 ,-0.67577606,-1.4206086 ,-2.357092 diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt new file mode 100644 index 000000000..bba80be5f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt @@ -0,0 +1 @@ + 2.5063417 , 0.22874236, 2.2677753 ,-4.4159026 , 1.7464 , 4.6051064 ,-4.2867146 , 2.730521 , 1.6372519 , 0.70292765, 3.459053 ,-4.162376 , 0.36788836, 2.213299 , 4.110952 , 1.6797827 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt new file mode 100644 index 000000000..31a2db03e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt @@ -0,0 +1 @@ +-4.1984134 , 3.7565446 , 1.3521377 ,-4.0263743 ,-1.929471 ,-3.7523155 , 1.3858393 , 4.1565247 ,-2.4681342 , 0.3598748 ,-2.0044599 , 3.7168603 , 3.6330557 , 3.0176272 ,-4.4643235 ,-0.1893698 , 3.8839848 ,-4.5703125 , 3.365731 , 4.5556674 , 4.954971 , 1.7591819 ,-0.9497736 ,-0.8527185 ,-1.1863561 ,-4.522639 ,-4.3187394 ,-3.702939 , 0.15341021, 0.8564923 , 1.9076811 , 4.2765 ,-3.7695112 ,-1.6033245 , 2.3159432 ,-1.6656336 , 1.4186145 , 4.334284 , 4.0654674 ,-4.518256 , 0.72815216, 2.5133176 ,-4.238172 , 1.0198449 ,-0.9638457 , 2.5847483 , 4.0381308 , 4.472872 , 0.11794223, 1.3358012 , 1.7975981 , 2.168553 ,-3.5131238 , 3.8412008 , 3.851232 ,-2.130775 , 3.556102 , 0.69062364,-4.668594 ,-4.619906 ,-2.87768 ,-1.0679495 ,-4.523185 , 4.184176 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt new file mode 100644 index 000000000..2bdd62b24 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt @@ -0,0 +1 @@ + 2.9193265 , 4.315574 ,-3.7834768 , 3.4352486 , 4.1452866 ,-4.0322523 , 1.8039155 ,-4.080042 ,-1.1999705 , 4.9018297 ,-0.27180746, 1.709373 , 4.3322196 , 4.9179945 ,-3.977508 , 2.3486571 ,-0.11026379,-0.24730131, 2.3269305 , 2.1862001 , 0.92486495, 3.5822759 , 2.8370361 , 3.915398 ,-0.6385275 ,-0.02720119,-1.408676 ,-4.4472733 , 1.2901759 ,-4.60209 ,-2.9502335 ,-2.650517 ,-1.4038593 ,-2.967456 ,-2.0060933 ,-1.9603083 ,-0.4727794 ,-1.7877682 ,-3.9565926 , 1.4452418 , 2.5925353 ,-4.5134907 ,-4.195412 , 2.4681656 , 0.7140492 , 3.0753498 , 0.269442 ,-4.768041 ,-3.5370746 , 1.0272335 ,-0.7654047 ,-1.977087 , 3.1920779 , 0.37378865, 4.016262 ,-3.3201067 ,-4.7767315 ,-3.5074112 ,-4.094166 , 1.6035818 , 1.6506963 ,-3.2142932 , 4.7714067 ,-1.7164946 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt new file mode 100644 index 000000000..8c770f61d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt @@ -0,0 +1 @@ +-1.8028042 , 1.7280815 ,-3.0464594 ,-2.810487 , 0.582805 ,-1.786865 ,-1.7263526 ,-0.36871073, 3.3955328 ,-3.9523299 ,-1.880003 , 4.9068613 , 4.6292953 , 3.9778202 ,-1.859954 , 2.8149757 , 4.5020967 ,-4.160163 , 1.9295161 ,-1.2508658 , 0.5669804 , 0.99246883,-2.4829247 , 0.88920474,-3.7942843 , 2.4626305 , 4.3087935 , 3.0680852 , 3.0893688 , 3.1640174 ,-0.41890725, 0.5377459 ,-4.0344224 ,-4.5812287 , 0.5720303 , 1.802316 ,-0.31413126, 2.9586952 , 1.1723012 ,-4.696369 ,-3.7047153 ,-1.8109767 ,-3.6122723 , 1.2727392 , 4.4057164 , 3.8347735 ,-4.739083 , 2.4655118 , 0.45258832, 4.0693913 ,-3.3486447 ,-0.64714307, 1.4990507 , 2.771129 ,-0.6109979 ,-1.0617865 , 2.0837703 ,-1.633663 , 1.8431798 ,-4.3942385 , 4.8523426 , 1.1941985 , 3.0366988 , 4.7991366 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt new file mode 100644 index 000000000..8a4c9ebb5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt @@ -0,0 +1 @@ +-2.2375767 ,-1.1274278 , 0.18025301,-4.598087 , 1.1042122 , 3.1241179 , 1.9084688 ,-1.214722 , 4.596646 , 4.1969523 , 4.658112 , 3.143779 ,-2.6940444 ,-1.5482163 , 1.542811 ,-1.1338089 , 3.721594 , 0.24673286, 4.71102 , 2.7811737 , 1.171089 , 4.145586 ,-2.6335135 , 1.1190183 ,-3.7932637 ,-4.6548123 ,-3.10302 ,-3.392706 ,-3.856141 , 0.6618614 , 0.9668614 , 4.4293485 , 1.3193 , 4.983464 , 1.659716 ,-3.185926 , 4.8983006 , 1.6323217 , 0.18800464,-1.9328839 , 4.6031475 , 3.459718 , 4.128766 ,-3.4701612 ,-2.3796144 , 1.6752707 ,-3.6569223 , 2.922704 , 3.642789 ,-1.6817225 , 3.151759 ,-1.5401909 ,-3.8259532 , 2.4556105 ,-4.4989905 , 1.2779988 ,-0.62634754, 3.5827441 ,-0.82541114, 2.1539748 , 4.583461 , 1.2231985 ,-1.4457659 ,-2.9194565 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt new file mode 100644 index 000000000..5110f86aa --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt @@ -0,0 +1 @@ +-4.011289 , 0.9077414 ,-2.8109396 ,-4.33598 ,-2.6516347 ,-3.917852 , 3.2461808 , 1.7588768 ,-1.9439132 , 2.190185 , 1.5180751 , 0.3587409 ,-4.3434815 ,-4.1376143 , 3.750847 , 1.5820616 , 0.03843357, 4.71235 , 1.0592757 ,-1.7640393 , 0.44547582, 2.8698466 , 4.5816092 , 4.6638517 , 1.4207541 , 1.863644 , 3.6007912 , 0.6800818 ,-2.4884489 , 3.0707197 , 3.3961668 ,-4.331953 , 2.7828538 ,-0.16146964,-4.9070745 ,-2.9787786 , 0.3337284 ,-3.935533 ,-3.303555 , 2.376896 ,-4.7058997 ,-2.2409894 , 0.07352693,-2.6024988 , 4.9593167 ,-4.7717366 , 1.6590588 , 4.063875 ,-3.8855767 , 2.6274624 , 4.901856 , 4.157007 ,-3.292969 , 3.579326 , 3.9860668 ,-3.0936542 ,-4.7793274 , 0.71697485,-2.0354068 ,-2.1414943 , 3.6339438 , 0.10732502,-0.86129206, 4.4152017 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt new file mode 100644 index 000000000..1a4fc3ed0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 2.2145607 , 0.88045335, 0.45151594, 2.852104 , 3.191637 ,-0.4578638 , 1.4858874 ,-2.1207588 ,-0.77495986,-4.1637363 , 0.83028954,-3.9974387 ,-3.3348315 , 3.7137656 ,-2.9883633 , 3.4332464 , 3.7178712 , 3.5850213 , 0.9240786 ,-0.07091421,-4.516931 , 3.965739 ,-4.828566 , 3.860382 , 0.3243482 , 1.6835089 ,-1.4710085 ,-2.6625636 , 1.942659 , 0.12808529, 1.3640044 ,-3.0124736 ,-3.646485 , 1.6046281 , 1.1087954 ,-2.4648561 ,-2.3274968 , 1.2196178 , 3.0752547 , 1.8316921 ,-2.926682 ,-2.247648 , 4.1264873 , 4.700915 ,-0.6861696 , 3.5246365 ,-2.5577545 , 1.832533 ,-4.3125343 ,-2.8579648 , 3.5299218 ,-0.67911506, 0.86782926,-2.918562 ,-3.3644724 ,-2.0097935 , 0.3721956 ,-1.3528451 , 3.8267515 , 4.916677 , 3.2055025 ,-0.64435905, 3.877367 ,-1.830818 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt new file mode 100644 index 000000000..09c06c74c --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 4.5410523 , 4.4007382 , 3.3252192 , 0.40420002,-4.7642856 , 2.0282986 , 2.32176 , 3.160375 ,-4.3348713 ,-2.324847 , 4.327631 , 3.253995 , 0.53624976,-4.4896946 , 4.0600896 , 2.697662 ,-3.0693228 ,-4.7954664 , 2.010163 , 4.5790668 , 0.00921074,-4.638007 ,-2.612561 , 4.338762 ,-1.3632652 ,-0.55081725, 4.273717 , 3.1074166 , 3.1386747 ,-4.033469 ,-0.7298752 ,-3.4973295 , 4.454913 ,-0.5148646 ,-2.4100194 , 2.7154703 , 4.1507893 , 2.3424785 ,-1.7028755 ,-2.6013496 ,-1.831555 ,-4.07971 ,-1.039077 ,-1.8733021 ,-3.885844 , 3.5691998 ,-3.8779395 ,-4.7566814 ,-3.570575 ,-3.0510366 ,-4.6841617 ,-4.751285 ,-2.9700782 , 3.4774506 ,-1.3150035 ,-3.6287053 , 2.2280993 , 4.502896 , 3.9448938 , 3.3926914 , 1.560589 , 3.3307595 , 2.6545596 , 2.0503757 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt new file mode 100644 index 000000000..24b7a248f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 4.5630627e+00,-4.5077333e+00, 6.8117022e-03,-1.1568142e-02, 2.3568916e+00,-2.9918964e+00,-4.8542055e-01, 4.7381549e+00, 3.1183126e+00,-2.6462586e+00, 3.0083582e+00, 1.4518642e-01,-2.4764729e+00,-4.8520207e+00,-4.8022575e+00,-1.8167463e-01,-3.1106927e+00,-2.4183941e+00,-4.1466684e+00,-3.6997426e+00,-3.9788694e+00,-3.0889416e+00,-2.2332447e+00, 1.8608164e+00, 2.8619974e+00,-3.6986623e+00,-1.3749057e+00,-9.2409855e-01, 2.7646086e+00,-3.3385031e+00, 7.6255083e-01, 1.0236104e+00,-1.7077237e+00,-4.4339476e+00,-1.1930060e+00,-1.7226344e+00,-3.1680160e+00,-1.8338548e+00,-2.6412952e+00,-8.2973856e-01, 4.2303777e+00, 3.4531716e-03,-3.3162324e+00, 8.4682000e-01, 2.5807633e+00, 2.7543969e+00, 6.8153429e-01, 4.7182851e+00, 4.2617507e+00,-1.4446728e+00,-4.3752551e+00, 3.5699592e+00, 9.6946698e-01,-2.0700858e+00, 2.0899124e+00, 1.6371955e+00,-9.5873147e-01, 3.1151581e+00, 2.9369416e+00, 4.4568644e+00,-9.4711387e-01,-4.1349549e+00, 3.3031983e+00, 4.1091359e-01 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt new file mode 100644 index 000000000..088eb62cd --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 2.5168443 , 3.7492614 ,-3.7076504 , 0.49709523,-4.642194 , 1.8201847 ,-1.396746 ,-1.0660223 , 3.3333528 ,-1.7719259 ,-2.3515563 ,-2.0570705 ,-4.7125244 ,-1.593302 ,-2.1072757 ,-4.4396334 , 4.3185077 ,-2.7568438 ,-0.59535027,-3.9871383 ,-2.6216223 , 0.39957425,-1.3687986 ,-3.1157744 , 1.2557942 , 2.3428473 ,-4.906711 , 3.5663006 ,-0.46128616,-4.7818427 ,-0.8876555 , 2.5066485 ,-1.3254607 ,-3.6097736 , 1.2686944 ,-1.37061 , 4.762917 ,-3.489012 ,-2.7905307 ,-0.2612837 ,-3.3236315 , 0.8347171 , 2.5582032 , 0.42744452, 1.7428764 , 2.4122005 ,-3.6781132 , 2.8811646 ,-2.7060914 ,-0.4752588 , 0.44432116, 0.5011615 , 3.2550313 , 0.02670379, 2.6197197 ,-4.319786 ,-1.4056181 ,-3.3794782 , 0.66822946,-1.4262298 ,-0.2465175 ,-4.6432767 ,-3.580772 , 2.960096 diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt new file mode 100644 index 000000000..bb8129473 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-4.9356976 , 3.9426446 ,-4.746647 , 2.3674695 , 0.54803735, 3.1911538 , 0.28858757, 0.4800329 , 2.0652595 ,-4.5046906 , 0.21695825,-0.17217463, 2.4329293 ,-1.2274694 ,-0.11534467,-2.096684 , 2.6882868 ,-2.5291932 , 0.56199783,-2.0743406 , 0.95846254, 4.004705 , 0.89853394, 2.9610496 , 2.9799032 , 1.5339601 ,-1.7136513 , 2.1797504 ,-4.2055335 , 1.5059681 , 3.0828342 ,-1.7946475 ,-2.7096524 , 3.1037905 , 0.75922704,-1.1446673 ,-2.084073 ,-1.2888353 ,-1.6958839 ,-0.8388285 ,-1.0279479 , 1.1291095 , 4.080411 , 3.6791847 , 0.9237894 ,-4.70821 , 0.5730598 ,-1.3565379 ,-2.7533107 ,-0.4583869 ,-1.4416862 ,-3.6039822 ,-1.1611387 ,-2.6919081 ,-0.6557734 ,-2.9248757 , 1.4998456 , 3.2239568 , 0.23668556,-3.4410136 ,-2.3170567 , 3.66808 , 1.9004405 , 4.3537745 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt new file mode 100644 index 000000000..182eb5290 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt @@ -0,0 +1 @@ + 3.4251418 , 1.8884782 ,-4.061519 ,-2.1329548 , 3.851976 , 3.668601 ,-0.7418167 , 2.379966 , 0.87259316,-3.96981 ,-4.627804 ,-3.3958297 , 3.025158 ,-1.299777 ,-4.322816 , 3.9173064 ,-0.55214256, 1.9224825 ,-4.8571157 ,-4.778045 , 3.3015614 , 0.56785774, 4.7985554 ,-0.4355816 , 4.9478025 , 1.7909397 ,-0.7620663 ,-0.09947702,-3.0230513 , 1.3817457 ,-4.5706887 ,-3.4097836 ,-4.7086477 ,-3.4651487 , 1.4401027 , 4.7513933 ,-1.0788624 ,-3.4946275 , 4.607974 ,-3.1215246 ,-1.4637078 ,-3.5266285 , 2.1268125 , 0.19458893, 4.058288 , 2.2452407 , 0.7575343 , 0.12213306, 4.885321 ,-1.2482406 ,-1.1034219 ,-4.054173 ,-3.6471267 , 4.774012 , 0.9450243 ,-2.5827825 ,-2.3991685 ,-2.8482654 , 0.9294943 ,-3.1165063 ,-1.6113516 , 0.04260086, 2.0987031 , 2.1601508 , 4.9740996 , 3.7719023 , 2.6817482 , 0.42131838,-1.4525859 ,-0.5124655 , 2.6313434 , 4.5606523 ,-4.6180778 , 4.788594 ,-0.8446551 ,-1.5460813 , 1.4288356 ,-1.9648911 ,-4.9766145 ,-2.405665 ,-0.30327383, 3.5204673 ,-3.848158 ,-2.6913974 ,-2.76141 , 4.336643 , 1.4205143 , 4.5898 ,-0.93183124, 4.2199287 ,-4.216924 ,-1.0979122 ,-2.3032405 ,-3.4457245 , 2.944412 , 2.137278 , 1.0326933 , 2.3116126 , 4.2138443 , 1.8283377 , 0.28901085,-1.8877143 , 0.50673705, 1.4360197 ,-2.924691 , 0.9819095 , 3.4656513 ,-2.541582 ,-1.9102442 , 3.3629627 ,-0.9675056 , 0.5937253 ,-2.4236617 ,-1.4193813 ,-0.7552614 ,-1.7121441 , 4.39647 ,-2.2712908 ,-4.3387337 , 1.5912663 , 0.8397044 , 0.17277755, 1.5272428 , 3.571715 ,-1.4471695 , 1.8623346 ,-4.3603377 , 1.2116091 , 4.960487 , 2.3681397 , 1.2925869 ,-4.3249073 , 2.4402251 ,-1.4506928 , 3.023616 ,-3.232099 ,-4.0106025 , 3.5774167 ,-0.6024932 , 1.0183483 ,-2.8215308 , 3.7395437 , 1.9100485 , 3.892712 , 4.6569633 ,-3.251774 ,-3.6923678 ,-4.8891983 ,-3.8605282 ,-4.0293036 ,-2.8199108 , 4.1668954 , 2.1569817 ,-2.9700332 ,-0.7035824 ,-0.5176811 ,-3.1826456 ,-3.334556 , 4.9103675 , 3.8513231 , 2.8609774 , 1.1845547 ,-1.4094447 ,-2.0445833 , 0.9833705 , 4.481276 , 3.83006 , 4.6240997 ,-4.268881 ,-0.85518706,-2.2650888 , 4.032545 , 0.9495817 , 1.1353155 ,-4.6551876 ,-2.2839146 , 2.6291692 ,-3.0398533 , 0.52652216,-1.8323399 ,-0.12300313, 0.46178594, 1.120684 , 1.4657134 ,-1.9794375 , 0.08941289,-4.4573083 , 2.7112565 , 4.9227715 , 2.4938288 ,-0.37153494,-4.1604757 , 4.7694197 ,-1.3021677 , 2.454714 ,-2.4902875 ,-2.760436 , 0.05183195,-2.6723208 ,-1.1471758 ,-2.2565122 , 0.20876396,-0.7288584 , 0.4386669 , 0.7846054 , 2.7294593 ,-3.836883 , 2.7501638 ,-4.775067 ,-3.2403855 ,-2.0307286 ,-1.6403166 , 4.9471517 , 1.0428456 , 2.5126355 , 3.0090203 ,-2.3476288 ,-2.9215205 , 3.8079188 , 0.83959275, 4.2670302 , 1.2338712 , 2.7329903 , 2.2549257 , 4.882931 , 0.12783106,-2.4392028 ,-2.4590807 , 4.2874207 ,-0.08333418,-3.4244132 ,-0.2235516 ,-4.23632 ,-1.3970895 , 2.1245553 ,-2.513883 ,-2.8092728 ,-1.9194845 ,-4.1932216 ,-3.7431748 ,-1.1063433 ,-3.714845 , 1.7230242 ,-0.19162221, 1.1123114 , 3.937181 , 2.6165597 ,-0.61531806, 0.44309503,-2.9260228 ,-3.1617007 , 0.0663496 , 2.4541974 ,-2.714474 , 4.2564497 , 1.2300675 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt new file mode 100644 index 000000000..dd8037244 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt @@ -0,0 +1 @@ +-4.8834 ,-4.6238756 , 2.020674 ,-2.3068821 , 3.7487323 ,-0.36079448, 0.08661745, 3.423143 , 3.3073757 ,-2.709357 , 4.4810205 , 3.4159606 , 4.1597505 ,-4.249789 , 2.3782206 ,-2.02848 , 0.90137833,-0.6249625 ,-3.5300052 ,-4.1113796 ,-3.768913 ,-3.59854 , 2.0896666 , 1.7677166 ,-2.3101497 ,-1.0116942 ,-3.7846713 , 2.4777756 , 3.413987 ,-2.1964507 , 0.08637846, 0.02552292,-1.9918599 , 0.7785565 ,-4.065995 , 0.8808776 ,-2.0446506 ,-1.8421272 , 0.42566776, 3.8834689 , 4.900111 ,-3.0617309 , 4.0613194 ,-3.3601153 , 3.678536 ,-4.1136184 ,-4.2903633 ,-2.6918027 , 3.4335177 ,-3.9272869 ,-1.6882807 ,-1.9629028 , 4.2125826 , 1.6536059 ,-1.1801353 , 4.8443203 , 2.9393198 , 0.4306524 , 4.390743 ,-4.6322317 , 2.932263 , 4.140538 , 2.7385068 , 2.620753 , 2.0725663 ,-1.3642436 ,-0.48539641,-4.2409816 ,-1.5950899 ,-1.688442 , 4.4769464 ,-1.25038 , 3.462903 , 0.5011836 , 0.981037 , 0.63532305,-3.4727957 , 4.6721544 ,-3.481392 , 2.8904114 ,-1.7057139 , 1.0501702 , 3.0799537 , 1.6698593 ,-1.3895478 , 4.487443 , 2.5352533 ,-0.19357985, 0.78166926, 3.5892236 ,-4.3259463 , 2.8381345 , 1.3652785 ,-0.40142608,-0.62102544,-3.088937 ,-4.0266094 , 4.7095647 , 2.0513067 ,-1.8115149 , 0.11062156,-4.5980725 , 2.809295 , 4.2042894 ,-3.4689455 ,-1.3418434 , 2.9026117 ,-1.6125411 , 2.153075 ,-3.4445221 , 3.4869678 , 1.8746428 , 0.8482056 , 3.0525062 , 1.715966 , 1.7684505 ,-2.0022326 ,-4.3427444 ,-3.1659825 , 1.6855526 , 3.1612136 , 2.0646648 ,-3.972224 ,-2.91726 ,-3.5450957 ,-2.7226381 ,-0.3273488 ,-2.5905557 , 3.6621993 ,-4.3285728 ,-0.6200474 , 0.08522832,-2.1981175 ,-3.4179437 , 2.5989106 ,-0.8503352 ,-3.3723786 , 3.9595454 ,-0.5431398 ,-2.6962373 , 1.9689399 ,-2.8925 ,-1.2064192 , 1.606632 , 2.2728612 ,-0.1403075 ,-4.8031726 , 0.1549256 ,-1.3698703 , 0.78889227,-2.286554 , 0.96417916,-0.10438658,-3.8131578 , 2.9322996 , 2.4103441 , 4.4864798 , 0.02176606,-1.1966147 ,-3.6921146 , 4.943659 ,-1.0050472 ,-1.2238564 ,-4.5758605 ,-2.6865735 , 1.7294792 , 4.180183 , 3.157911 ,-3.581904 ,-2.9112866 , 4.1674094 , 3.2326035 ,-2.7883985 ,-0.09154221, 0.8667318 ,-4.532571 , 0.816668 , 3.1307516 ,-4.1993947 ,-1.0503744 , 0.123965 , 0.17691068,-3.1465137 ,-1.4964765 , 3.4077635 ,-0.35415363, 1.9092371 ,-4.709203 , 1.148622 , 4.4766874 ,-2.193539 ,-3.7959206 , 1.4420112 ,-2.5300896 , 4.107192 , 3.4666913 ,-2.1158516 ,-3.182484 ,-2.8406513 ,-1.9396024 ,-2.3695247 , 3.8301885 ,-1.5032169 ,-0.48879272, 0.41695955,-1.1829228 , 4.822825 ,-2.9244933 ,-3.8178608 , 2.7742817 , 2.6998327 ,-3.1187122 , 2.508593 , 1.2989064 , 2.3436947 ,-0.39074868,-3.034766 ,-1.8690065 , 4.850296 ,-2.4549792 , 4.839528 , 2.2758777 , 2.6689568 , 3.2014422 , 3.6975234 ,-3.2566156 , 3.546554 , 1.9570364 ,-2.753807 , 2.3366053 ,-4.357898 , 4.9184504 ,-1.0057111 ,-3.8582199 , 1.2416974 , 4.355522 ,-2.7863925 , 0.4679685 , 2.6850772 , 2.9984746 , 2.434312 , 2.9931593 , 2.2637212 ,-0.18371914,-4.07688 ,-2.0402577 , 0.5173147 , 0.19596666, 4.71653 , 4.291663 ,-3.3575501 ,-1.0857964 ,-0.16504912, 3.6683955 , 2.9581416 ,-1.354989 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt new file mode 100644 index 000000000..1295bfdba --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt @@ -0,0 +1 @@ + 1.2340723 ,-1.7371651 , 4.271641 ,-2.3332376 , 0.82301813,-3.4199295 ,-0.75806665,-2.2647665 , 2.613749 , 2.2658496 ,-2.1277714 ,-0.465433 ,-0.1323059 ,-1.9658507 ,-4.7780223 ,-4.392719 ,-0.81063855,-3.639001 ,-3.6398284 , 4.6309023 ,-0.17483327, 1.7921627 ,-1.1493484 ,-3.8145075 , 2.2367268 ,-0.40209827,-1.4159911 , 2.3032134 ,-4.154446 , 1.6760192 , 2.3430173 ,-1.386683 , 3.3363335 ,-2.976934 , 3.3983 ,-0.0069695 , 3.7025425 ,-1.8683758 , 0.72029626, 2.7558882 ,-4.4060984 , 2.553126 ,-3.5888321 , 1.8549582 ,-0.52258795, 4.6549897 , 0.8886988 ,-3.0400214 ,-3.6890693 , 3.6663766 ,-4.8026586 , 1.0636287 ,-2.9774907 , 0.39021772,-4.2414255 , 2.914968 ,-0.24334456,-4.0344954 ,-1.1011956 ,-3.8205252 , 0.05693521,-4.1379023 , 1.0584197 ,-4.0404034 , 4.841462 ,-1.2727845 , 2.6974225 ,-4.2507453 ,-2.7101111 ,-2.9800036 , 0.3082796 , 3.6763537 , 2.3277721 ,-4.9667864 ,-2.4498677 , 0.2704629 , 3.006634 ,-1.1129389 , 4.373073 ,-1.2066779 ,-3.1575904 ,-2.721046 ,-0.861226 , 1.7315729 , 2.255666 , 2.5448847 , 3.1268334 , 1.5189171 ,-3.1992466 , 0.607633 , 4.0749955 , 1.2546133 ,-1.5335796 ,-1.6200712 ,-3.9392874 , 1.053699 ,-0.87970537,-3.9218261 ,-2.2724128 , 0.82235074,-2.3400521 , 3.6467028 , 1.6891364 ,-1.6333519 , 2.2639709 ,-0.08272895,-3.076964 , 3.731091 , 3.7932968 , 2.496441 ,-4.12142 ,-2.0908666 ,-4.994248 ,-0.0429902 ,-4.6083336 ,-4.522535 , 4.717733 , 1.6715643 ,-4.779822 , 1.2919815 ,-4.6121325 ,-0.6206874 ,-2.6633883 ,-1.9632595 ,-3.2203329 ,-0.6556523 , 1.3083993 , 0.13287744, 4.599294 ,-1.1777852 ,-2.9159715 ,-0.25669238, 0.48217958,-3.9736347 ,-0.774503 ,-0.7264863 ,-3.0058725 ,-2.1682055 , 2.6579158 ,-4.4020653 , 3.0450368 , 1.3798735 ,-4.9858127 ,-4.5812607 ,-3.7349749 ,-4.4158583 , 1.631093 ,-3.0769646 ,-3.8406906 , 1.6544044 , 0.36895755,-1.8196682 ,-2.0880237 ,-3.708266 ,-2.0277069 , 1.0536597 ,-3.6726243 , 1.1704421 , 2.3201573 , 1.4994124 , 4.0197086 , 2.1001272 ,-0.39845964, 4.879206 ,-4.6042013 , 4.367211 , 2.2712052 , 2.7754369 ,-3.156667 , 4.349216 ,-4.111492 , 1.0267047 ,-2.3381946 , 4.8876834 , 4.876814 ,-0.28538027, 4.8861 ,-0.95963717, 0.46279734,-4.5789995 , 0.26168647,-0.8879058 , 2.4468584 , 1.3030591 , 3.7261188 , 3.9933589 , 2.4964094 ,-1.3851117 , 0.7147012 ,-3.8367457 , 0.79737735,-0.5907085 , 4.317288 , 0.7659837 ,-4.821792 ,-1.466433 ,-1.147227 ,-1.8638811 , 2.5115767 , 1.9449657 ,-2.4122007 ,-2.4968379 , 0.7738737 ,-1.4761454 , 4.131583 , 0.4211128 ,-2.4312468 ,-1.9722428 , 2.2810268 , 4.950381 ,-0.0406047 , 4.67312 , 0.66613483,-0.28880936, 3.2917845 , 1.6225572 , 4.809879 , 0.48241946,-3.654634 , 0.68542016, 1.3973923 , 3.479005 ,-1.4296091 , 0.64391786,-4.0887494 ,-2.186845 ,-4.5834355 ,-0.67726034, 2.4158256 ,-2.4787726 , 0.4353257 , 2.9205139 , 0.10488439, 2.0790074 ,-4.5518365 ,-3.3856661 , 3.940736 ,-1.7141095 ,-4.8946457 , 1.1085542 , 3.785141 ,-2.4175835 , 3.7720537 , 4.623048 , 2.2239215 , 0.11616404, 0.09229392,-3.637964 ,-2.334849 ,-0.95000714,-2.1338253 , 3.2281857 ,-4.0220475 , 4.7304025 ,-1.8075961 , 0.2428817 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt new file mode 100644 index 000000000..378b5fea5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt @@ -0,0 +1 @@ + 2.4605505 ,-2.7001262 ,-4.3874917 ,-2.9867616 ,-3.4332 , 0.76675916, 3.4377892 ,-0.6712793 , 1.8018581 , 1.8148962 , 2.0353577 ,-4.766427 , 3.2487285 , 3.886249 ,-2.8867183 ,-0.7906634 ,-4.376028 ,-4.2085958 ,-0.36025277, 0.6360799 ,-4.687723 , 4.8313313 , 3.3582768 , 2.1117954 , 0.9821817 , 3.3697798 ,-1.1784939 ,-3.1590316 ,-0.24019621, 0.20640443, 1.2808957 , 2.3346424 , 2.13951 , 0.61864626, 2.4020443 ,-1.9671458 ,-1.6852348 , 0.32225233,-2.3928862 ,-4.173372 ,-2.282281 ,-1.271318 , 3.0839682 ,-4.4726086 ,-0.635177 , 3.2710915 , 3.08071 ,-0.7311931 , 2.1444874 , 0.4102332 ,-3.332888 ,-4.8965516 , 3.903695 , 1.4920163 ,-4.041926 ,-0.3941788 , 3.6352818 ,-2.098405 ,-0.9248165 , 2.6277795 , 3.225142 ,-1.4461963 ,-4.2050753 ,-0.2213572 , 1.9704323 , 3.298732 ,-4.710403 , 3.6876736 , 2.0771818 , 1.3559113 , 1.328373 ,-4.4079022 ,-3.28067 , 3.8852313 , 2.322237 , 2.3243637 ,-1.9126451 , 4.6277676 , 1.7031307 , 0.74861574,-4.688967 , 3.9351206 ,-1.8054084 , 1.5824287 , 3.5381088 , 2.4798677 ,-3.3099444 ,-3.8518245 , 1.5562242 ,-1.9466928 , 0.08375791,-0.16754703, 2.9265418 ,-1.6599798 , 2.766202 ,-2.8269696 ,-0.19389874, 2.0869334 ,-1.5073173 ,-3.2024453 ,-3.6522708 ,-4.588111 ,-2.3425827 , 4.8709297 ,-1.4231887 , 1.0590451 ,-1.6406479 , 0.37192422, 0.7313186 , 0.3865313 ,-4.2832613 , 3.9712496 , 0.07653506, 0.2593589 ,-2.6036396 ,-0.45185068, 3.6537335 ,-0.6341783 ,-0.6381408 ,-1.0992868 , 2.766365 , 4.666631 , 4.416099 ,-3.6654727 ,-4.0626607 ,-3.4928396 ,-0.6944366 , 4.869798 , 4.2240977 , 0.9655519 ,-2.5654511 , 1.3396966 ,-3.7639391 ,-1.2369057 ,-3.7242758 ,-0.5189227 , 1.6548159 ,-2.6197302 , 4.2732763 , 2.239486 ,-4.316255 , 3.2419755 ,-1.9283817 , 0.22489135, 2.6034477 , 0.15818155, 2.0811818 , 0.836994 , 2.7832468 ,-0.68581384, 0.89475006,-3.1455147 ,-4.818614 ,-4.1738377 , 0.4281551 ,-2.935886 ,-3.7582467 , 0.58168256, 0.2854076 , 1.0492616 , 2.2415884 ,-4.4923434 ,-3.2479804 , 3.8439462 , 3.9802108 ,-0.9027783 , 1.7783072 ,-2.2782066 , 4.4638705 , 4.28735 , 4.291463 , 1.1685107 , 1.2765578 ,-3.7954235 ,-3.494621 , 4.4340134 ,-3.5995178 ,-4.3025713 , 3.3037348 ,-3.6675146 ,-1.7871013 ,-1.2922373 , 0.72924066,-4.7065907 , 2.1388702 , 2.3570008 , 3.9203117 , 0.07483537,-2.8389792 ,-1.795164 ,-4.380931 , 1.3189598 , 2.4404252 , 4.4774084 ,-1.2798066 ,-4.95842 , 1.8095461 , 4.2692375 ,-2.0918155 , 0.33083543,-3.794544 , 1.4940621 ,-3.9446015 ,-0.38208306, 0.30863285,-0.6832849 ,-2.5675633 ,-4.948772 , 1.5904989 , 3.0415509 ,-4.899339 , 0.9415345 ,-0.91124976, 4.4849253 ,-3.4605968 , 1.6737833 , 1.9091597 , 1.3111106 , 2.0829957 ,-2.1308084 ,-2.912219 , 1.1306196 , 2.231948 , 4.7522073 ,-2.1438766 ,-2.1000512 ,-0.2984778 ,-1.2093959 , 2.6259391 , 1.8113437 ,-4.137133 , 2.716111 , 3.4318748 ,-0.89123845,-3.70718 , 2.453927 ,-0.22418758,-3.098459 ,-4.4986243 , 0.85048616, 2.8023102 , 3.743153 , 0.9931644 , 3.8588202 , 1.7585737 ,-4.2855363 ,-2.5475764 ,-0.83141845,-1.9358089 , 3.1711586 , 2.4221613 ,-1.881327 ,-3.7230873 ,-4.55259 ,-0.42294836, 4.64625 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt new file mode 100644 index 000000000..339435425 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt @@ -0,0 +1 @@ +-3.37344313e+00, 2.78325319e+00,-7.30300546e-01, 1.33456266e+00, 3.96648932e+00, 4.33421373e+00,-3.11558557e+00,-3.64659280e-02,-1.73589993e+00, 4.81018400e+00,-8.32905114e-01, 2.33330703e+00, 1.85830116e+00,-4.60395622e+00, 5.26070774e-01,-4.71355534e+00,-2.97202754e+00, 3.57638383e+00, 4.50985909e+00, 2.08423686e+00,-1.85349309e+00,-2.18306184e+00,-4.65403509e+00, 4.31280661e+00, 1.16069472e+00,-4.85344124e+00, 8.40563923e-02,-1.98723459e+00,-4.29561710e+00,-2.57372570e+00,-4.22641230e+00,-4.00811911e+00,-9.61861551e-01,-2.14665198e+00, 4.18120289e+00,-3.87826174e-01,-2.86187083e-01,-4.84979200e+00,-1.34733701e+00, 1.27489030e+00, 1.98844969e+00,-4.11230135e+00,-1.61191213e+00, 2.63515592e+00, 4.35539484e+00,-1.56582773e+00,-2.45283508e+00, 1.44556177e+00,-8.56053472e-01, 3.25111747e+00, 3.58699083e+00,-2.47732449e+00, 3.64130282e+00,-4.91288567e+00, 8.97059917e-01,-2.26010180e+00, 4.91831064e+00, 4.45047706e-01, 1.88655663e+00, 3.20642543e+00, 1.38243341e+00, 9.06112790e-01, 1.15262544e+00,-2.39862514e+00,-2.87477684e+00, 7.36831248e-01, 3.18799114e+00, 1.22698748e+00, 5.63625395e-01, 1.29130912e+00,-4.89572334e+00, 2.11258578e+00,-4.55420208e+00, 4.94569272e-01,-7.08617330e-01,-1.84863120e-01,-4.81965256e+00,-1.06512284e+00, 4.79633398e-02, 2.70429182e+00, 4.78289175e+00,-2.11806059e+00, 4.23046875e+00, 3.18022132e+00,-8.39496255e-01, 3.13150501e+00,-3.24103773e-01,-7.48505890e-01,-2.45754886e+00, 4.16639376e+00, 3.25864077e+00, 3.40006447e+00,-3.77217412e+00, 2.93266010e+00, 3.33685803e+00, 1.02347994e+00,-2.22839618e+00,-1.90375733e+00, 3.24283957e+00,-4.01684284e-01,-4.45417643e+00, 3.74440104e-01, 3.33520865e+00, 6.64106190e-01, 3.84395885e+00, 2.38586918e-01,-1.51634857e-01,-2.64977455e+00,-3.45786500e+00, 4.89002228e+00,-1.07323432e+00,-2.92749858e+00,-1.76510501e+00,-3.44604325e+00,-1.89681911e+00, 4.20239258e+00,-1.75864971e+00, 2.13181686e+00, 3.90355319e-01,-4.11911535e+00, 6.61891177e-02,-4.32988214e+00,-1.42876351e+00, 3.12163901e+00,-4.56227779e+00, 4.17938662e+00, 9.63881195e-01, 4.35952139e+00, 1.61931109e+00, 4.11196423e+00, 2.25612569e+00,-4.77538586e+00,-1.72600198e+00,-4.39411783e+00,-8.98730099e-01,-1.04562032e+00,-2.81517529e+00, 3.57167959e+00, 1.90318239e+00, 2.17302442e+00,-3.79942179e+00, 2.19838643e+00,-4.16209459e+00, 4.45025682e+00, 1.68786839e-01,-2.56879544e+00, 3.60925221e+00, 1.06542781e-01,-3.48755455e+00,-6.77028894e-01,-3.51582170e+00, 3.90697241e+00, 4.49116230e+00,-1.56180394e+00, 4.96249914e+00, 9.63374436e-01, 2.72304177e+00, 8.38046610e-01,-2.91993833e+00,-9.41783428e-01, 8.00800502e-01, 3.89176035e+00, 6.70560122e-01, 2.76782703e+00,-1.37075472e+00,-3.25303817e+00,-4.41226482e+00,-8.38777184e-01, 1.73568249e+00,-1.09438455e+00,-1.08815920e+00, 1.06787062e+00, 2.04415274e+00,-2.93027782e+00,-6.86941504e-01, 3.83109421e-01,-3.49270535e+00,-2.13225913e+00,-3.61786675e+00, 1.32213378e+00,-2.89654016e+00, 4.23944092e+00, 4.53665400e+00, 4.26081800e+00,-1.95718706e+00, 4.72295076e-01,-3.08592963e+00, 2.53354859e+00, 3.80069661e+00,-1.14408419e-01, 2.39438844e+00,-4.73618507e+00, 2.35079074e+00,-1.43686843e+00, 1.32946157e+00, 1.10381134e-01,-3.49878430e+00, 2.83181930e+00, 4.57872486e+00, 2.29953095e-01, 7.19881415e-01,-2.97208834e+00, 4.11286211e+00,-3.89149117e+00, 3.83631349e+00, 4.14627981e+00,-1.14082299e-01,-6.89825296e-01,-2.55468488e+00,-4.04466152e+00, 9.95541453e-01,-2.59181118e+00,-4.60567427e+00,-4.77339029e+00,-7.36041367e-02, 1.85957468e+00,-3.42530179e+00, 4.55782986e+00,-3.29603004e+00, 3.55632234e+00, 2.40858841e+00,-2.07399082e+00,-3.96705031e+00, 4.41718817e+00, 3.19581985e+00,-3.72379017e+00,-3.76826024e+00, 6.79764748e-01,-4.43838930e+00, 2.29627752e+00, 2.34923697e+00,-4.23308420e+00, 3.80186272e+00, 8.65862250e-01, 8.44927967e-01,-1.05974531e+00, 4.70531940e+00, 1.25060010e+00, 4.82314730e+00,-4.53093815e+00, 4.51410580e+00, 4.95166332e-01,-3.45584202e+00, 1.82002666e-03,-3.27616286e+00,-2.68104935e+00, 2.39554620e+00, 2.99364328e+00,-2.57998848e+00,-4.35891914e+00, 4.64737415e+00,-5.74958742e-01, 6.47293210e-01, 1.85961032e+00, 4.49567413e+00,-4.36166048e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt new file mode 100644 index 000000000..e0e52c398 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 4.5734663 , 3.96675 ,-2.7826853 , 4.377681 , 1.8424977 ,-2.8312624 , 0.65628445,-3.7023883 ,-1.8941027 , 0.53154576,-3.9718776 ,-3.3961854 ,-2.7500536 , 2.6793208 , 3.3515985 , 2.0939343 ,-4.3965416 ,-1.7462187 , 0.5660886 , 4.497879 ,-2.2529721 ,-4.8996797 ,-0.00740948,-2.941367 , 1.9482567 ,-2.462802 ,-0.7897884 , 3.1501546 , 3.1216884 ,-3.506249 , 2.871302 ,-3.964653 ,-0.40679944, 2.8930066 ,-4.783338 ,-1.8733944 , 2.2654383 ,-0.41361305,-3.7790897 ,-1.9458629 ,-2.274427 ,-2.9192872 ,-0.73215395, 2.8135974 , 2.1402152 , 4.516366 , 1.58816 ,-4.607831 ,-3.5409598 , 1.9784997 , 3.11111 , 1.0872442 ,-3.6907403 ,-4.774325 ,-4.9267297 , 1.2962086 , 2.4646177 , 2.2726526 , 4.8766675 ,-2.9272413 ,-0.06221364,-0.80498594,-2.319938 ,-3.8261194 ,-2.3452706 , 2.5408983 ,-0.80628425,-1.4547366 ,-4.4171157 , 3.1584027 , 4.2213454 , 3.0342784 , 2.0285478 , 3.4517126 , 1.870827 , 2.812075 , 1.0776864 ,-4.524331 , 3.1467574 ,-2.366355 ,-4.7368546 , 1.940347 , 4.282059 , 1.2666475 ,-4.9559174 , 2.8177614 , 1.1941892 ,-0.25412267,-2.833778 , 1.1770393 , 4.9503546 , 4.582686 ,-1.0778978 ,-2.9030416 , 3.2517505 , 1.556093 ,-3.7605543 , 0.5915735 ,-2.6323159 , 4.596147 ,-0.90292877, 2.8230112 , 4.9295835 , 3.523853 , 1.7742149 ,-2.6014073 , 2.162894 , 1.9364033 , 4.0920115 , 0.81613404, 2.4198878 ,-0.907447 ,-4.79113 ,-3.4193892 ,-0.3334577 ,-1.0439668 , 4.2233415 , 1.4482704 , 1.3646252 ,-0.9206041 , 4.4994802 ,-4.2411633 , 0.6763335 ,-1.3827848 , 1.8579848 , 1.6426222 , 0.904467 , 3.876264 ,-4.6476808 , 4.576801 ,-1.4680524 , 2.441134 , 3.2343059 , 0.23119794, 2.5640545 ,-0.7293438 , 3.7184558 ,-1.6056752 , 3.1490617 , 4.6837263 , 4.7100887 ,-2.785927 ,-0.1520597 ,-1.9914767 ,-4.00598 ,-2.7502792 , 3.7857378 , 2.8444788 , 4.9911737 , 0.29277426,-4.779576 , 3.223367 , 1.3517398 , 4.8757277 , 3.8083189 , 1.7660266 ,-2.1543872 , 4.822371 , 2.089687 ,-4.7373757 ,-2.4061642 , 2.0387447 ,-4.067881 ,-3.1757388 , 0.24974413,-0.24441184,-0.1168329 ,-0.35149318, 2.0035832 ,-4.248678 ,-1.4723817 , 3.8218668 ,-2.8085105 , 4.6995482 ,-3.0093114 ,-3.648268 ,-1.0374364 , 0.04459473, 2.3945484 ,-0.63439727, 3.3920286 , 2.403765 , 1.303556 , 3.232244 ,-0.44932058, 0.9601637 ,-3.3821623 ,-4.257736 ,-4.095783 , 0.42818338,-4.925627 ,-1.8419602 , 4.9393196 , 0.8049334 , 4.431875 , 2.8487725 , 2.1205912 , 1.7367444 ,-4.337498 ,-3.574642 ,-3.8927085 ,-0.35219863, 2.8415039 ,-0.2887568 ,-0.89806557, 2.669602 , 4.8017626 , 4.278042 ,-1.2604581 , 3.152027 , 2.1625066 , 1.5039738 ,-3.7209976 ,-0.72354925, 4.006067 ,-3.7651584 , 0.7198826 , 3.9594896 , 0.6228397 , 2.8464649 ,-0.18740664,-2.0530953 , 3.5185826 , 2.5037062 , 0.3990585 ,-4.423475 , 4.6931167 ,-1.0078553 , 0.74727917,-4.289701 , 1.697721 , 3.4963684 , 1.5796075 , 2.296678 ,-2.9379995 , 4.4748416 , 0.25155628, 4.1183267 , 0.9506131 , 1.2903908 ,-4.6828184 ,-2.309908 ,-4.2793307 ,-2.2069294 ,-4.038367 , 4.641971 ,-2.3178709 ,-2.2683682 ,-0.96986157, 2.6649144 , 2.3106637 ,-1.8052462 ,-4.9433284 , 1.7941002 , 4.80127 ,-0.06690114 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt new file mode 100644 index 000000000..9a8f222e7 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 2.2282960e+00, 1.0135865e+00,-4.1930809e+00, 5.3674412e-01,-3.2516165e+00, 1.2745492e+00, 4.2867136e+00, 1.9524460e+00,-3.6757104e+00,-3.6086998e+00,-9.4525421e-01,-3.4005399e+00, 3.3607626e+00, 4.2363039e-01,-2.5177178e+00,-3.0130227e+00,-4.1442380e+00, 4.4951862e-01,-6.4387190e-01, 4.3701029e+00,-3.6790867e+00, 3.2749624e+00,-2.2554400e+00, 1.8269253e+00, 1.8358005e+00,-6.0994375e-01, 3.5964453e+00, 4.8953295e+00,-2.6134133e+00,-3.9301482e-01, 4.0286818e+00,-8.9392501e-01, 2.6430035e+00,-1.0339550e+00,-4.2311502e+00, 5.1657695e-01,-3.0095081e+00,-3.2156844e+00, 3.0075660e+00,-2.4905038e+00, 2.2380588e+00, 4.6933036e+00,-2.7880669e+00,-3.3672907e+00, 2.5187421e+00, 2.1843061e+00,-3.9957666e+00,-4.5409918e+00,-1.7282218e+00,-4.6849327e+00, 3.1863580e+00, 2.4342964e+00,-4.5180349e+00,-2.4310455e+00,-2.6789901e+00,-1.6438740e+00, 4.9613748e+00,-3.7800386e+00,-4.4277740e+00, 1.0571244e+00,-3.3765689e-02,-6.2219787e-01, 2.1075857e+00,-2.0555353e+00, 2.6996508e+00,-3.0303302e+00,-3.8262250e+00,-4.5048919e-01, 2.6760142e+00, 3.2696848e+00, 2.8136756e+00,-2.7064829e+00, 8.5861349e-01,-1.8871003e+00,-9.5355767e-01, 2.3704410e+00, 4.8897211e-02,-4.6371531e+00, 1.5693765e+00, 3.7866819e+00,-2.9738419e+00, 1.2106347e+00,-5.8760280e-03,-6.4124316e-01, 4.2396611e-01, 4.8550687e+00,-3.0650468e+00,-1.2087260e+00,-2.4833875e+00, 2.1272743e+00,-1.8991195e-01,-3.5372739e+00,-2.3402226e+00,-1.0234243e+00, 2.8981063e+00, 8.7964945e-02, 3.2136328e+00,-3.4051507e+00,-4.5538807e+00,-4.0228786e+00,-1.8993270e-01,-4.5704255e+00, 1.8850164e+00, 9.9910229e-01,-4.8424377e+00,-3.1492932e+00, 2.3922281e+00, 4.8503261e+00,-2.1037047e+00, 3.3602579e+00, 1.3546667e+00, 1.3481154e+00,-2.3604252e+00,-1.3253393e+00,-3.5330158e-01,-2.1313765e+00, 3.1442962e+00,-1.1570807e+00,-4.5890884e+00,-4.1608801e+00, 1.8554245e+00, 2.4646142e+00,-1.8453486e+00, 3.3489871e+00,-1.1248070e+00, 3.1451607e+00,-1.4458319e+00,-2.2727523e+00,-2.0378258e+00, 2.4566815e+00, 3.8839689e-01, 4.2570353e+00, 2.3613093e+00, 1.2956337e+00,-7.5734973e-01,-1.4549307e+00, 9.3240172e-01, 4.3444591e+00,-6.4935732e-01, 2.5328317e+00,-2.3545196e+00,-4.7553263e+00, 2.6134777e+00,-2.5526178e+00,-1.7996631e+00,-2.0215256e+00,-4.6141486e+00,-1.7283168e+00, 2.5297335e-01, 3.7009020e+00,-1.9858284e+00,-3.4631619e+00,-1.5858738e+00,-2.5620985e+00, 3.2822473e+00,-3.2632313e+00,-9.0714562e-01,-2.3562717e+00, 4.4088845e+00,-3.6630182e+00, 5.5761892e-01, 1.6045070e+00,-3.6806375e-01, 4.3184443e+00,-1.3219705e+00, 1.5496376e+00,-1.5801797e+00, 2.1545045e+00,-4.0106788e+00, 3.4172714e+00,-4.2495294e+00,-6.1115064e-03,-7.2607052e-01,-7.3130745e-01,-4.4462271e+00, 4.8119636e+00,-4.7460346e+00,-3.0464313e+00,-2.8801811e+00,-1.4347218e-03, 4.4133449e+00,-3.3173063e-01, 4.3802023e+00, 2.6040417e-01,-2.5531218e+00, 3.7436140e+00,-4.1636271e+00,-3.3907690e+00,-1.4418361e+00,-3.6933661e+00,-2.6342602e+00,-3.1492887e+00,-5.5590755e-01,-1.6814464e-01,-1.0868104e+00, 4.9451909e+00, 3.4104226e+00, 1.0342516e+00, 4.7993002e+00, 1.2480364e-01, 1.6109833e-01, 2.6366503e+00, 1.6535910e+00, 4.3810592e+00, 4.4755011e+00, 4.3265424e+00,-3.1934264e-01, 9.8549920e-01, 1.9962710e-01, 2.8525822e+00,-3.7352023e+00,-1.3402178e+00, 2.5931063e+00,-2.6708813e+00,-7.6831090e-01, 3.0769660e+00, 1.4107993e+00,-1.8936746e+00,-4.7568636e+00,-1.9222193e+00, 4.7693071e+00, 2.8644614e+00, 4.1877995e+00,-3.6974251e+00, 4.5314616e-01,-7.1986055e-01, 4.8653622e+00, 1.4722897e+00,-8.6220115e-01,-4.1846976e+00, 3.7767217e+00, 3.7630556e+00,-4.5851058e-01,-4.9183292e+00,-1.8750135e+00, 1.0773923e+00,-5.2709883e-01,-9.2767686e-01,-1.3984675e+00,-2.0892789e+00,-4.3801632e+00, 4.0080590e+00, 4.2269025e+00,-1.2195336e+00,-2.2649438e+00, 4.6874623e+00,-3.8354571e+00, 5.9588730e-01,-2.8315885e+00, 3.0605823e-01, 2.1416895e+00, 1.6045133e+00,-3.3075256e+00, 4.9898911e+00, 1.7708080e-02, 3.5305614e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt new file mode 100644 index 000000000..1b2e33401 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 1.9229428 , 2.1045275 , 2.0514195 , 1.7149676 ,-4.1647053 , 4.3958654 , 2.1192055 ,-2.4357705 , 2.249189 , 4.7986865 ,-1.0146881 , 2.5108647 , 0.7262246 ,-2.3110187 ,-0.434008 , 2.6220334 , 1.3261455 ,-2.0402927 , 0.6362597 , 0.12827367, 0.94167644, 1.6396433 , 2.802215 , 0.92637545,-2.8669958 , 2.1684341 , 4.7197456 ,-3.0393784 ,-1.5588902 ,-1.5589788 ,-1.2792847 ,-4.301159 , 3.6853306 , 3.5522077 ,-3.5120559 , 3.6523628 , 0.52381915,-4.3210206 , 3.1021209 ,-4.4059095 , 4.574733 ,-3.708168 ,-3.4609973 , 0.04494883, 4.6041393 , 4.6209555 ,-2.184693 , 3.3114836 , 4.0440845 ,-4.362543 ,-3.0185041 ,-3.4911432 ,-1.0443465 ,-3.1546419 ,-3.0831194 ,-1.8959469 ,-3.7653599 ,-1.8753844 , 3.969308 , 4.0960746 , 0.256032 ,-0.11065102, 4.753394 , 4.8433857 , 0.17249103, 0.44612473, 3.5996687 ,-3.7071083 , 4.15448 , 2.7609568 , 0.7979912 , 2.6985793 , 0.24981445,-0.7343978 ,-3.8946455 ,-3.4738345 ,-2.0124238 , 4.6603985 , 0.9002829 ,-2.2128618 ,-0.8752893 ,-3.0990481 , 2.770291 ,-1.4642559 , 0.4561498 , 0.5808671 , 2.4227936 ,-2.400878 , 0.6494001 , 1.0195295 ,-3.2693145 , 1.9889433 , 3.5208216 , 3.6280289 , 4.322899 ,-2.805155 , 3.7704606 , 0.6797415 , 4.442675 ,-0.5069875 , 1.3373847 , 4.6953626 ,-0.7946793 ,-2.7352958 ,-1.9969261 , 0.43059692, 2.50853 , 1.9314603 , 1.3780333 , 2.0536468 ,-1.572231 ,-4.5323825 ,-1.3175989 ,-1.5515776 ,-0.05870355, 0.32408538,-4.2935586 ,-1.561555 ,-1.7551405 ,-0.93950266, 3.2540953 ,-4.623753 ,-3.4944966 ,-0.7603045 , 0.76591074,-4.9114766 ,-2.679303 , 0.12950227, 4.094419 , 4.781908 ,-3.6946337 , 2.766349 ,-0.45678583,-2.275264 , 2.0858452 , 3.1182098 ,-1.2942638 , 4.4418044 , 2.2264028 ,-3.3838644 , 1.4427853 , 3.7365992 ,-1.1815038 , 1.4555137 , 0.22728541,-0.18817298, 3.454521 , 3.1835914 , 4.0786743 ,-1.5111316 , 1.1560454 ,-0.04693017, 0.44183066,-0.7420173 ,-1.2243766 , 3.4453049 ,-2.969513 ,-0.82397145, 4.870895 , 3.0178127 , 1.7217305 , 4.482936 , 1.9468685 , 3.9970267 , 4.7294793 , 2.9921744 , 4.470473 , 4.7626653 , 0.13104612,-4.651569 , 2.7991815 ,-4.734433 ,-2.4499187 , 1.0739365 ,-1.5583646 , 3.6531756 , 2.7731194 ,-4.72427 ,-4.5801177 ,-4.035709 , 2.5767221 ,-2.8133557 ,-1.8342617 , 3.5808434 ,-2.1022995 ,-3.5421894 ,-3.0776916 , 3.168665 ,-0.07246887,-1.2413273 , 4.7964606 ,-1.0624843 , 0.75939703, 2.5336463 ,-4.8622346 ,-4.9744167 , 2.1007512 , 1.5271608 , 0.37077245, 1.7765028 , 2.2724373 , 2.1864665 ,-0.37378153, 1.3559381 ,-1.4220421 ,-1.4756224 , 3.6143627 , 2.7846546 ,-2.5194893 , 3.005039 ,-3.6451447 ,-1.9118739 , 0.04718782,-3.0775185 ,-1.4801219 ,-2.35909 ,-0.4728799 , 4.610093 ,-4.472677 ,-4.530808 , 0.12514372, 0.05973044, 4.457302 , 3.1129916 , 3.6036162 , 4.5086145 ,-3.548999 , 0.4976606 ,-3.6525648 ,-2.1937015 ,-1.3205789 ,-2.6594079 , 4.415343 , 3.219482 ,-3.7286756 , 3.4116418 , 0.82889384,-3.0168123 , 4.382766 , 2.7633846 , 3.6949344 , 3.9806223 ,-0.6415279 ,-0.3193684 ,-1.3176754 ,-1.4990829 , 4.694691 ,-1.0581211 , 1.2103747 ,-0.26690048,-1.157015 ,-1.8951306 ,-0.8580171 ,-4.3080263 , 4.0737123 ,-1.2607352 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt new file mode 100644 index 000000000..50ed09011 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 4.9386005 , 3.7248888 , 3.3261378 , 4.8302746 ,-3.9337704 ,-4.2943096 , 0.16059242, 0.17785172,-2.4971933 ,-2.933359 ,-4.598231 , 4.7816315 ,-0.6563864 , 4.452592 , 1.8066075 , 3.1572745 , 4.500678 ,-1.1609873 ,-1.6962403 , 1.567031 ,-3.3120036 , 1.8150452 ,-2.7486987 ,-1.6800771 , 1.4895486 , 1.120401 , 1.4983965 , 4.7132416 , 0.39645562,-3.12486 ,-0.5966056 , 4.618641 , 1.225812 , 0.99017185, 3.9918585 , 1.299415 ,-1.2995726 , 4.202907 , 3.8657827 ,-4.0268126 ,-0.90370494, 0.5030568 ,-2.9651542 ,-4.1249614 ,-2.8990393 ,-4.1228724 ,-1.2640246 ,-0.72640723,-1.7128279 , 2.7710931 , 2.8189523 ,-0.8384207 , 0.71266395, 3.8393862 ,-1.7801509 ,-3.1485069 , 3.2076547 , 2.267659 ,-3.745656 ,-4.373508 , 0.86005193,-4.9145784 , 0.9253047 , 1.1243923 , 0.46507052, 1.9978004 ,-4.642887 ,-2.1898057 , 0.88199854,-2.1837327 , 1.1112527 ,-1.4548608 ,-3.5766103 ,-1.5607064 ,-3.630397 ,-1.9193211 ,-0.8931484 ,-0.2812017 ,-1.2881653 ,-2.5051243 ,-3.5648384 ,-0.5431733 ,-0.47036746,-2.8132265 ,-0.4302025 ,-4.003176 , 0.31743896,-3.074693 ,-3.3994603 , 0.62276137, 0.12920536,-2.5154057 ,-0.22098878,-2.711012 ,-0.303956 , 4.6025276 , 3.1887815 ,-0.50345755,-2.6543994 ,-0.8452558 ,-1.4075644 , 3.6716504 , 2.7388885 ,-4.9426928 , 3.5494354 , 4.777085 ,-3.3904083 ,-2.4746811 ,-2.943489 , 1.3607427 , 1.313449 ,-2.7959676 , 4.5932074 , 0.2460288 ,-1.1802251 , 0.6807028 ,-3.7335384 ,-0.30950046, 0.0558207 ,-4.7604976 ,-4.5745177 ,-3.3872643 ,-1.102581 ,-1.5612804 ,-1.2933319 , 4.5290637 ,-2.5096595 , 0.8673844 , 0.6069363 , 0.8294639 ,-0.05487671,-2.5923786 , 3.2974155 , 2.252853 ,-2.4157743 , 1.6614583 , 1.975577 ,-2.7390766 ,-0.26459846, 0.8946814 ,-3.257953 , 4.0526175 ,-1.5219783 , 4.6063023 ,-0.09599628, 3.2825923 , 2.0063279 ,-3.597641 ,-0.41604096,-2.5593333 , 1.8169669 ,-3.6998532 ,-2.3723404 , 0.4008657 , 2.1002467 , 4.9284163 , 4.6011457 ,-4.8977246 , 4.7852945 , 1.2170111 ,-1.055987 , 2.27575 , 1.0601226 ,-4.176826 , 0.08197393, 4.0421042 , 3.6263971 , 2.6941037 ,-2.644993 , 0.10439859,-4.512112 , 3.7939842 ,-4.8532767 , 0.391317 , 3.6432517 ,-3.9992728 , 0.29700363, 1.2722415 ,-2.3793647 ,-3.377246 , 2.0930648 , 2.574604 ,-1.2509564 , 0.4457573 ,-0.46469867, 2.6793416 , 0.02566718,-0.11948132,-3.1046712 ,-0.6204446 ,-4.615342 , 4.057695 , 1.1312845 ,-3.0446556 ,-1.9381613 ,-0.92255247,-3.5459394 ,-1.1972907 , 0.5879403 ,-1.2265042 ,-2.6279037 , 3.7533212 ,-0.2950134 ,-1.6104454 , 4.7811155 , 3.9216835 ,-2.2905827 ,-3.9489107 ,-4.078132 , 4.878544 ,-2.1483154 ,-3.1480436 ,-1.8742744 , 0.38310575,-4.0457416 ,-1.5423136 , 4.9426446 , 2.80434 ,-2.758338 , 1.6596367 ,-4.559686 ,-1.2686385 ,-1.2173673 , 0.49475643,-2.4956207 ,-1.5008336 ,-1.7967415 ,-1.1574938 , 2.2852411 , 1.7171949 ,-3.328038 ,-3.1454384 ,-0.41883984, 3.822312 , 1.1161699 ,-1.5137968 , 3.1651397 , 3.2411747 , 1.2685378 , 2.7408757 ,-3.078621 , 3.3460293 ,-0.34918678,-1.0433053 , 0.9397743 ,-3.9071774 , 0.68924445, 4.896269 ,-4.234603 ,-4.8659916 , 1.472339 , 4.5464644 , 0.35857418, 3.4065645 ,-1.514736 , 4.2301235 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt new file mode 100644 index 000000000..163c037cf --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-0.91463715,-2.9258113 , 4.4465976 ,-0.84762925,-3.3510911 ,-0.15094744, 2.2284694 , 3.9705405 ,-1.6315348 , 4.698665 , 2.8595035 ,-2.4719086 , 4.2091336 ,-3.7003224 , 0.06198901, 4.24617 ,-3.7041452 , 1.4280707 , 0.61925036, 3.873551 , 0.3554166 , 3.0535998 ,-1.403015 , 2.5769274 , 4.0060935 ,-2.134697 , 0.61366636,-2.2069314 , 3.5629356 ,-4.94381 , 3.3054771 ,-0.42945656, 4.4868546 , 4.124087 ,-4.039486 , 0.75716823,-4.530404 ,-0.8464823 , 2.7817092 ,-4.954212 , 4.790015 , 2.5307322 , 0.635834 ,-3.393037 ,-3.7000508 ,-1.1439751 ,-2.4422479 , 3.9414582 ,-4.0586324 ,-3.5872777 , 2.2529798 , 0.50453144,-2.9947112 ,-0.76174486, 0.8427806 ,-0.90798455,-0.5518859 ,-1.1810572 , 1.2787138 ,-1.7791113 ,-4.661412 ,-3.7413049 , 0.03910514, 3.970302 ,-3.0697417 ,-4.107844 ,-1.985001 ,-2.434408 ,-3.0120797 , 0.34467867, 0.09826441, 3.1933572 , 0.09855966, 1.7976784 ,-3.3814316 ,-2.8423817 ,-4.787137 , 0.21746217,-1.8560363 ,-0.7145455 , 3.911294 , 4.6970305 ,-4.0105987 , 3.3843613 , 2.3087065 , 1.8619018 , 1.6607213 ,-4.1276345 ,-0.15251912, 3.1198032 , 1.8143575 , 2.178214 ,-4.6250186 , 4.4006424 ,-3.378407 , 3.6481302 , 4.4439235 , 4.5322957 , 2.7754776 , 1.9026359 ,-2.9371052 , 0.32501587, 4.980984 ,-3.2300677 , 4.190388 , 4.441369 , 0.8116277 ,-4.7056756 , 1.1501676 ,-0.9759702 ,-0.1920487 ,-3.2009268 , 4.654679 , 4.043145 , 4.579935 , 4.917842 ,-3.2166183 , 2.381046 , 2.3470554 , 0.04456256,-2.6785278 ,-2.1683002 ,-0.2686819 , 0.6097173 , 1.5071467 , 3.9692068 ,-3.4313831 ,-0.87708473, 3.9917011 , 0.7843428 ,-4.6622047 , 0.774621 ,-4.6538844 , 3.6392822 , 4.962988 , 1.4132729 ,-0.40482154,-1.8656421 ,-1.6113061 ,-1.3454957 , 0.40846685,-4.5410986 , 2.7158992 ,-1.8403106 ,-3.803351 , 4.406537 ,-1.5868717 , 2.7034876 ,-3.3383765 , 4.6084027 ,-1.691095 ,-0.52188784, 2.9010768 , 0.08786624, 2.7466853 ,-1.7457972 , 0.59371734,-0.1716976 ,-2.6220891 , 4.9432936 , 2.3500183 , 1.6905144 ,-2.7329378 , 4.003541 ,-1.1137847 , 3.9017355 , 0.9116626 , 4.233729 ,-2.6706429 , 3.4342804 ,-0.42729262, 1.174779 ,-4.944099 , 1.2316282 , 4.9237943 ,-2.2999635 ,-4.9210916 ,-1.9033331 , 0.43241265, 3.2149148 , 4.1269703 , 0.8590868 , 2.734273 , 1.658618 ,-2.1702065 ,-2.0058317 , 4.0706363 , 4.003833 ,-0.35835287, 2.5514262 , 1.2571276 ,-4.655018 , 3.6468434 , 0.06320113,-4.662375 , 1.0745742 ,-1.117399 , 4.167245 , 4.59434 ,-1.686359 ,-0.17328739, 0.3083307 , 3.3926466 , 2.2254786 ,-0.45468137, 2.4956248 ,-3.492782 ,-2.9805465 ,-1.0610795 ,-0.2784433 , 0.7163735 ,-3.0048254 ,-1.8024784 ,-3.3139167 ,-1.8410577 , 4.5702477 ,-3.4454951 ,-1.4504164 ,-1.7432297 ,-4.998418 ,-2.5524495 , 3.028534 , 4.075326 ,-2.2187853 ,-0.6484594 , 3.00815 ,-2.8010397 ,-4.5529976 , 1.7830837 , 0.3373458 , 0.19151935,-1.0437245 ,-3.6349878 , 1.1947471 ,-1.9664146 , 0.27316815,-0.20781417, 2.419226 , 0.02246885, 4.5222287 , 3.1069999 , 3.940458 , 4.2710595 , 3.4216619 , 2.8447206 , 2.7136886 ,-0.60954016, 2.9277234 , 3.995615 ,-0.30593097, 1.7800944 , 1.0608315 , 3.8786283 ,-2.7564247 , 1.8526665 ,-3.8638606 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt new file mode 100644 index 000000000..e580d6f85 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt @@ -0,0 +1 @@ +-4.024665 , 3.0544488,-4.5645285,-3.2134292,-2.1543078, 4.039755 ,-4.613908 , 4.2014904, 3.8222141,-4.4992657,-4.02681 ,-3.2933445 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt new file mode 100644 index 000000000..c593dfbb6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt @@ -0,0 +1 @@ +-2.669042 , 2.479217 , 4.691815 , 1.8187722 ,-3.7656548 ,-2.0555806 ,-2.4494352 ,-3.2394514 ,-0.38215363,-1.543695 ,-0.6927158 , 2.3534324 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt new file mode 100644 index 000000000..14520a177 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt @@ -0,0 +1 @@ + 4.036224 ,-1.2903051 , 1.2116423 , 3.92255 ,-0.48049024,-1.0290806 ,-0.9644837 , 1.3379688 ,-1.0027533 ,-1.9611529 , 3.7190473 , 0.45794436 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt new file mode 100644 index 000000000..2238d5e9e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt @@ -0,0 +1 @@ + 4.560488 ,-1.2475324, 1.8892838,-2.0155866,-4.968927 , 0.3717404,-0.6095849, 3.2483344,-1.2499679, 1.4237018,-3.1225715, 3.0611598 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt new file mode 100644 index 000000000..14a91ccc9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt @@ -0,0 +1 @@ +-1.7167594, 2.116633 ,-1.3816848,-1.7106141,-3.273076 ,-4.148302 ,-2.1654181, 0.4368236, 3.4279666, 1.2954224, 1.3004405,-4.3022 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt new file mode 100644 index 000000000..3b2a3c258 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 4.9167333 , 0.9170983 ,-2.4031715 , 0.4819133 , 0.21536288,-2.0262568 , 4.364642 , 1.7851653 , 2.0982797 , 0.5736603 , 2.5769486 , 3.68285 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt new file mode 100644 index 000000000..dff8a3b09 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 3.8708763 , 3.263454 ,-4.796817 , 0.6411522 ,-3.0385532 , 0.49334133,-0.20283684,-0.88814104, 4.826072 ,-4.8037696 , 4.757636 ,-3.036691 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt new file mode 100644 index 000000000..93e747284 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt @@ -0,0 +1 @@ +-3.8694625 ,-3.5254061 ,-0.23680535, 4.1042504 , 3.2534697 ,-1.8511593 ,-1.9182487 , 2.6457057 , 0.12923336, 2.618141 , 1.2465005 ,-4.4625525 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt new file mode 100644 index 000000000..c924e03d9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt @@ -0,0 +1 @@ +-2.5559328 , 1.768443 ,-1.4850446 ,-1.2771453 ,-2.7216687 , 2.80077 , 0.21637216,-0.6145739 ,-0.37175298, 3.8750615 ,-1.9910356 ,-1.657059 diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt new file mode 100644 index 000000000..1153c85ed --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-1.6168976 ,-3.816399 ,-0.55625045, 4.961818 , 0.19316113,-2.6601286 ,-1.6928803 , 4.1208386 ,-1.4012221 , 2.7742999 , 0.75798005,-2.5877 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt new file mode 100644 index 000000000..1f2993269 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt @@ -0,0 +1 @@ +-3.3436873 ,-0.79453826, 2.2211137 , 2.6420908 ,-1.3191302 , 1.2973647 ,-4.506594 , 4.867371 ,-4.318404 , 1.6957753 ,-4.3091793 ,-3.2230556 , 4.9175825 ,-3.1527104 ,-2.6669753 ,-2.1135337 ,-3.7701926 ,-3.358504 ,-4.419803 , 3.2045574 ,-0.5828494 ,-3.5796826 ,-4.0088696 ,-4.7178082 , 2.2726505 , 2.1860175 , 3.7198956 ,-0.5788681 ,-3.7766652 ,-0.65016747, 3.707159 ,-2.240267 , 4.5772953 ,-0.54754776, 4.7143884 ,-3.196982 ,-3.6356654 , 3.7157805 , 3.1312432 , 0.58816016, 2.1710336 ,-1.600533 ,-3.689763 , 4.322089 , 0.4816874 , 2.2769346 ,-3.9072733 ,-0.58615017 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt new file mode 100644 index 000000000..a19ea6696 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt @@ -0,0 +1 @@ +-1.275483 ,-3.6622071 ,-0.87433696, 0.60946655, 1.4415421 , 3.3705983 , 2.2635043 , 3.3926573 ,-0.2936643 ,-0.5169573 , 3.2535644 , 2.1269164 ,-3.4180303 , 1.0427854 ,-1.3514856 , 3.6084783 , 4.569944 ,-0.79272085, 2.9771423 ,-1.6668562 , 4.8700657 , 0.3355385 , 0.76509756, 3.5142152 ,-1.6743544 , 4.794434 ,-2.958765 ,-0.23857778, 2.4555902 , 2.459867 , 3.3922994 ,-4.350212 , 0.6286153 , 0.8139546 , 4.1676807 ,-3.3461437 , 0.69633776,-4.6548877 , 0.98267466,-4.508397 ,-1.4581255 ,-1.2289628 , 3.8701873 , 3.334336 ,-3.5611253 , 2.6133575 ,-1.0554558 ,-3.3291767 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt new file mode 100644 index 000000000..7113eb52e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt @@ -0,0 +1 @@ +-0.6250365 ,-4.798417 ,-4.214081 ,-3.625409 , 2.4391694 , 4.1856265 , 3.2472587 ,-3.20996 ,-2.3537548 , 1.3749354 , 2.5947835 ,-1.8891864 ,-3.612735 , 2.246563 , 1.2701501 ,-2.8927476 ,-0.71078295,-3.6037376 ,-4.5916877 , 2.0044398 , 3.4437728 ,-1.0695096 , 4.3483944 ,-3.3387017 ,-0.9384242 , 1.4229002 ,-0.6568144 , 1.1164346 , 1.7145283 ,-2.596518 , 4.6728883 , 3.4737296 , 1.7935314 , 3.1263895 , 1.3614839 ,-3.824968 ,-3.0405738 , 3.1729462 ,-4.1985774 ,-2.9489865 ,-4.2080064 , 2.0368521 ,-2.858539 ,-0.03206728,-1.1123812 , 0.2994737 , 1.6906137 ,-0.8665008 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt new file mode 100644 index 000000000..afeb2c0e6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt @@ -0,0 +1 @@ +-4.5279946 ,-3.4497826 ,-2.058617 ,-0.39549035,-0.26672208, 3.0173857 , 3.2430282 , 1.9996022 , 1.3895315 , 1.7620904 ,-4.9040093 ,-3.2858686 ,-2.2823575 ,-1.4176623 ,-0.537347 , 0.68219584,-3.193989 ,-3.1675165 , 0.47214374,-4.390378 ,-1.8730192 , 1.4416525 ,-3.0460286 ,-0.73547626, 1.8686327 ,-0.8146671 ,-2.0906649 , 0.01226121,-0.06992937, 0.9302521 ,-2.1858516 , 4.8370657 ,-4.1847024 , 4.4963436 ,-1.3834711 ,-1.1244944 , 0.4290957 ,-4.2681174 , 1.2978764 , 3.4149706 ,-2.7011304 ,-3.1285405 ,-3.8857136 ,-0.18625297,-0.13618916, 2.427405 ,-1.7979074 ,-1.4174187 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt new file mode 100644 index 000000000..99c6284d6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt @@ -0,0 +1 @@ +-0.40635094,-2.485209 ,-2.9641154 , 4.09174 ,-1.9137962 ,-2.0860991 , 1.6594787 , 0.53744185, 1.7737653 ,-1.7054961 , 2.5611186 ,-1.1456238 , 2.741241 ,-2.283051 ,-4.2111306 ,-0.8722772 , 1.6465468 ,-0.61518955, 0.08495517, 3.6847656 , 3.7826371 , 2.0023444 ,-3.5326133 , 2.3723035 , 3.7383325 ,-3.3514297 , 2.031452 ,-0.7364658 ,-4.3347225 ,-2.8146286 ,-1.37377 ,-3.518721 ,-0.19657679,-1.6831368 , 1.2457223 , 0.25099897,-4.4722757 ,-4.135197 ,-0.6378818 , 3.8833187 , 1.9291897 , 2.5969315 , 2.146067 ,-2.846719 ,-2.2562532 ,-2.6856182 , 2.824374 , 2.3662992 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt new file mode 100644 index 000000000..081a1e6ee --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt @@ -0,0 +1 @@ +-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt new file mode 100644 index 000000000..f6b31db38 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt @@ -0,0 +1 @@ +-4.7488093 , 4.805902 ,-0.29828382, 0.57486725,-4.864297 , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353 ,-3.9832466 , 3.1243927 ,-4.795229 , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881 , 4.316894 ,-0.6444627 ,-3.8289468 , 4.012964 , 0.7878584 ,-1.8921386 , 2.779619 ,-3.762597 , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568 , 2.6959393 , 3.8100271 ,-2.099064 , 3.3663592 ,-2.0818436 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt new file mode 100644 index 000000000..acc01cb55 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 4.279912 ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241 ,-3.9593613 , 4.189409 ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294 ,-4.5454354 , 4.362368 , 2.2204642 ,-4.9866576 , 3.31571 , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307 ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337 ,-1.0270193 ,-4.807054 , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465 , 4.892313 , 4.715815 ,-4.6481915 , 0.24676175 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt new file mode 100644 index 000000000..0f0b7a939 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt @@ -0,0 +1 @@ +-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915 ,-1.7996464 , 1.2268789 ,-4.938172 ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261 ,-1.0718596 ,-3.996107 ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381 , 4.6604958 , 4.6422915 ,-1.097833 , 3.666126 , 0.4735639 ,-4.480704 ,-4.831033 ,-0.27288163, 4.588138 , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873 ,-1.7102181 , 1.7746873 , 0.02711739 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt new file mode 100644 index 000000000..d23450db6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt @@ -0,0 +1 @@ +-4.707647 ,-4.0921726 , 3.5813692 ,-4.71081 , 3.157816 ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102 ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016 ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485 ,-0.4829316 , 4.8046784 ,-1.753812 , 4.878712 ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104 , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599 , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt new file mode 100644 index 000000000..eb058a1c3 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt @@ -0,0 +1 @@ +-0.55411166,-4.1992335 , 1.4317423 ,-3.7261302 , 1.151971 ,-2.117022 ,-0.7386241 , 4.654951 , 1.4869142 ,-4.6252975 ,-3.305923 , 3.632628 ,-2.6403873 ,-4.862389 , 3.477561 ,-4.9842925 ,-3.6267536 , 4.9950438 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt new file mode 100644 index 000000000..ff15f032d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt @@ -0,0 +1 @@ + 0.18094282,-0.58095986, 1.2765085 ,-0.534363 , 4.5564513 ,-0.28305855, 0.80606604,-3.3217795 ,-0.08041744,-3.7558215 ,-0.5370528 , 1.8984528 ,-0.09462419,-0.28595117, 4.6817894 ,-4.6653147 ,-4.127137 ,-2.3407753 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt new file mode 100644 index 000000000..e564168bf --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt @@ -0,0 +1 @@ +-0.62747055, 1.4133646 ,-0.9954612 ,-4.687624 ,-2.5390003 ,-4.534569 ,-1.1943612 ,-4.830596 , 4.3214984 ,-2.4795794 , 4.166298 ,-1.4772589 ,-4.074577 , 3.2332711 ,-1.5221404 ,-1.7308865 , 0.06814837, 2.944668 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt new file mode 100644 index 000000000..c763b6311 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt @@ -0,0 +1 @@ +-3.2136867 , 0.6229863 , 0.02772082,-0.00820862,-2.4893622 ,-0.6757174 ,-2.2024722 ,-2.0893583 , 0.33953062,-3.5438979 , 0.7000838 , 1.3219849 ,-0.02302017, 2.3125873 ,-1.5376673 ,-4.0330076 , 4.755884 , 2.729685 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt new file mode 100644 index 000000000..12e13272d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt @@ -0,0 +1 @@ + 0.82922786, 4.762074 ,-3.5043278 , 2.4521468 , 2.6450796 ,-2.8606322 , 0.8321993 ,-1.4020495 ,-0.25749585, 1.0287803 ,-3.911455 ,-1.8311876 , 2.763438 , 3.8604703 ,-3.5478592 ,-4.2335987 ,-3.6402035 ,-1.8485361 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt new file mode 100644 index 000000000..42ce6be36 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 1.1826919 , 0.07451724, 3.48515 , 3.4905832 , 1.8009655 , 4.155749 , 3.3155255 , 2.6834202 ,-1.7111781 ,-2.2254407 ,-4.578932 ,-2.1239302 ,-0.1269101 ,-2.6022012 ,-4.8320093 , 0.2983099 ,-0.43314072,-0.66332716 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt new file mode 100644 index 000000000..f677cc836 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt @@ -0,0 +1 @@ +-1.2971772 ,-3.6082 ,-2.2253058 ,-4.4367466 ,-1.7221912 , 0.02547262,-3.641017 , 0.2953748 , 0.7217547 , 4.663728 , 4.262444 ,-3.196005 ,-1.6792587 ,-1.7463406 , 2.030074 , 0.67998594,-0.92862725,-1.7960806 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt new file mode 100644 index 000000000..841ea9f8f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt @@ -0,0 +1 @@ + 2.2390285 ,-1.9557759 ,-1.2331479 ,-2.4810686 ,-0.5112022 , 1.741153 , 0.13645513,-2.3543327 ,-3.2610211 , 2.5739572 ,-0.50510126, 2.3544457 , 1.884411 ,-3.7153857 ,-1.7037194 ,-0.36849263,-4.819704 , 3.047652 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt new file mode 100644 index 000000000..08ec9fe8f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt @@ -0,0 +1 @@ +-0.9080747 ,-1.5609599 ,-0.40923035,-2.0569193 , 4.5904484 ,-0.02348744, 0.35939455, 2.2017193 , 2.2766497 ,-2.2080436 ,-2.6453862 ,-3.6456985 , 4.160244 , 1.7283534 , 4.5547447 ,-1.8674839 , 3.019465 , 1.1584582 diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt new file mode 100644 index 000000000..a4f2d97d1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt @@ -0,0 +1 @@ + 4.5920744 , 3.827386 ,-2.1228654 , 3.7227573 ,-3.4464717 , 0.31313375, 0.5531476 ,-0.30391756,-0.21601346, 3.8968146 , 0.23224053,-0.6208954 ,-0.76323295,-1.1700501 ,-1.6203161 , 2.1780837 , 2.3581395 , 2.6519518 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt new file mode 100644 index 000000000..0e8d687b1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt @@ -0,0 +1 @@ +-2.327701 , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt new file mode 100644 index 000000000..67732e8f5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt @@ -0,0 +1 @@ + 0.01133719,-3.3741624 , 3.556686 ,-4.21059 , 0.49977505, 1.768375 , 3.867543 , 2.270572 ,-3.9507272 ,-4.595618 ,-4.7460327 , 0.5856542 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt new file mode 100644 index 000000000..7bc7124d6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt @@ -0,0 +1 @@ +-2.7181 , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037 , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345 ,-1.4018577 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt new file mode 100644 index 000000000..0fac9fb70 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt @@ -0,0 +1 @@ +-3.920553 , 0.87464577,-1.0319884 , 2.1885726 , 2.755115 ,-1.6436632 ,-4.4507327 , 4.915525 , 2.9331517 , 4.7712016 , 4.676084 ,-1.7715888 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt new file mode 100644 index 000000000..df79104c2 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt @@ -0,0 +1 @@ +-2.181168 ,-1.6011912 ,-4.359466 ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455 ,-2.822578 , 0.8914152 ,-3.0267959 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt new file mode 100644 index 000000000..4b999a028 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt @@ -0,0 +1 @@ + 3.241328 , 2.7033713 ,-2.5329788 ,-4.078369 ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt new file mode 100644 index 000000000..7061063b9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt @@ -0,0 +1 @@ + 1.572614 , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117 ,-0.96047217 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt new file mode 100644 index 000000000..c048a8a9f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt @@ -0,0 +1 @@ + 0.00864919,-3.1653113 ,-2.125551 , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259 , 2.5843353 ,-2.067207 ,-2.5034845 ,-4.9441104 ,-3.9062042 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt new file mode 100644 index 000000000..55be3b464 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt @@ -0,0 +1 @@ + 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736 , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247 diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt new file mode 100644 index 000000000..04c7a1a8a --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt @@ -0,0 +1 @@ +-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt new file mode 100644 index 000000000..e9db48f9e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt @@ -0,0 +1 @@ +-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465 , 2.198896 ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963 ,-0.12837897, 4.132799 , 3.697655 , 2.0807178 ,-3.621293 , 2.121878 ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215 , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt new file mode 100644 index 000000000..479d062f1 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt @@ -0,0 +1 @@ + 3.4726453 , 3.0497985 ,-4.234619 ,-1.0526706 , 1.7278554 ,-3.341614 , 4.54768 , 3.0954597 ,-3.735109 , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938 ,-3.327242 , 2.0654576 ,-2.2294598 ,-1.881382 , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618 , 2.3318915 , 0.84300846, 0.760918 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt new file mode 100644 index 000000000..ae28234bd --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt @@ -0,0 +1 @@ +-4.6097918,-4.21991 ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt new file mode 100644 index 000000000..fd40f84f4 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt @@ -0,0 +1 @@ + 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt new file mode 100644 index 000000000..e81c3b8e5 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt @@ -0,0 +1 @@ +-2.0927553 ,-2.107511 ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935 ,-1.1564373 ,-0.8199472 ,-2.245698 ,-4.8605046 ,-3.569018 ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578 ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt new file mode 100644 index 000000000..a8874bc5f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt @@ -0,0 +1 @@ + 3.9384239 ,-3.7377489 , 0.97284186, 3.8309984 , 2.4125865 , 1.7141674 , 3.9459977 ,-0.304659 ,-3.4623327 , 4.4569106 , 4.209985 ,-0.6677348 , 3.4578135 , 1.6779743 , 2.502791 ,-1.324285 , 1.3139176 , 3.4334664 ,-2.2695086 ,-4.001059 ,-0.91164917, 4.4447775 ,-3.0275404 ,-2.0852396 , 3.6677403 ,-2.9595146 , 2.0921555 , 1.7570637 , 3.717391 ,-0.3216191 ,-0.8410847 , 2.662336 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt new file mode 100644 index 000000000..715e680be --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt @@ -0,0 +1 @@ + 0.6663157 ,-0.04146723,-0.8193995 , 4.804576 ,-2.1357434 , 4.0829 ,-1.6380692 , 1.8043218 , 2.3431025 , 0.30111 , 1.2928191 ,-1.8559257 ,-0.68305963,-1.1502715 , 1.9492546 ,-2.7240746 , 2.9279857 ,-3.3329778 ,-4.8343406 ,-0.02708206, 1.1840513 , 3.6476028 , 4.75276 ,-4.9085226 ,-1.1922491 , 0.54225117, 3.17247 ,-2.7856457 ,-3.0866194 ,-2.2077718 , 1.6263398 , 3.7066603 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt new file mode 100644 index 000000000..3ca893e61 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt @@ -0,0 +1 @@ +-4.8507566 ,-1.267258 , 0.5099198 , 1.650726 , 3.4329638 ,-2.2652836 , 1.2157568 , 0.18305123, 3.6754217 ,-4.6185255 ,-1.0646905 ,-0.46092424, 2.046326 ,-2.8830478 , 4.156068 ,-2.0503244 , 0.0755459 ,-4.6472006 ,-0.50128895, 3.1129324 ,-4.4048553 , 0.47983927, 1.4510479 , 3.9226127 ,-4.767221 ,-2.795826 ,-4.816457 ,-3.6127663 ,-2.2712553 , 4.586938 , 1.1028811 , 1.5028698 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt new file mode 100644 index 000000000..3fba8ecec --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt @@ -0,0 +1 @@ + 4.9431224 ,-3.4878132 ,-2.4831018 , 2.2395666 ,-2.3317611 ,-1.6786547 ,-2.4702384 , 3.2167027 , 1.7300137 , 2.8848834 ,-4.6395254 , 0.5527259 ,-2.915835 ,-1.0066313 ,-0.278253 , 4.6136203 ,-3.4183645 ,-1.5189631 ,-4.599058 , 3.3198457 ,-3.9464161 ,-0.6357558 , 0.32550323, 3.2147424 , 4.921844 ,-0.30067012, 3.9456701 , 0.5943688 ,-4.7229166 ,-3.6803844 ,-3.3813965 , 3.283583 diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt new file mode 100644 index 000000000..16cc23b79 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt @@ -0,0 +1 @@ + 2.232644 , 4.465217 , 1.926956 ,-4.007337 ,-2.7392106 ,-2.4579394 , 2.913538 ,-1.7261469 , 3.8706868 , 0.06259949,-2.018361 , 1.2728635 ,-3.133289 ,-4.943454 ,-1.5415367 ,-4.8183494 , 4.348317 ,-2.4929109 ,-0.9018388 ,-4.776565 , 4.634248 , 3.0753953 , 2.3412373 ,-2.7086196 , 3.4485948 , 0.3561932 , 0.03650501,-2.8704169 , 1.0514414 , 3.3964615 , 1.2783849 , 4.974951 diff --git a/compiler/pota-quantization-value-test/test_quantization_with_config.sh b/compiler/pota-quantization-value-test/test_quantization_with_config.sh new file mode 100755 index 000000000..1364dfb90 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_quantization_with_config.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +# This script tests quantize_with_minmax option of circle-quantizer with config file +# +# HOW TO USE +# +# ./test_quantization_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ... +# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH} +# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test) + +SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py" +CONFIG_PATH="$1"; shift +BIN_PATH=$(dirname "${CONFIG_PATH}") +TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs" +GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py" +WORKDIR="$1"; shift + +source "${CONFIG_PATH}" + +echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}" +echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}" +echo "-- Found workdir: ${WORKDIR}" + +TESTED=() +PASSED=() +FAILED=() + +pushd "${WORKDIR}" +while [ "$1" != "" ]; do + MODELNAME=$1; shift + GRANULARITY=$1; shift + DTYPE=$1; shift + TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}" + + TESTED+=("${TESTCASE}") + + TESTCASE_FILE="${WORKDIR}/${TESTCASE}" + TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}" + + PASSED_TAG="${TEST_RESULT_FILE}.quantization.mixed.passed" + rm -f "${PASSED_TAG}" + + cat > "${TEST_RESULT_FILE}_quantization_with_config.log" <( + exec 2>&1 + set -ex + + # Generate h5 input data + source "${VIRTUALENV}/bin/activate" + "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \ + --model "${WORKDIR}/${MODELNAME}.circle" \ + --input "${TEST_INPUT_PATH}/${MODELNAME}_config/${GRANULARITY}/${DTYPE}" \ + --output "${TESTCASE_FILE}.mixed.input.h5" + + if [[ $? -ne 0 ]]; then + echo "FAILED TO GENERATE INPUT" + continue + fi + + # Run record-minmax + # NOTE There is no '_with_config' test for record-minmax, because it does not + # use quantization config file. + "${RECORD_MINMAX_PATH}" \ + --input_model "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \ + --input_data "${TESTCASE_FILE}.mixed.input.h5" \ + --output_model "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" + + # Run circle-quantizer with --quantize_with_minmax + "${CIRCLE_QUANTIZER_PATH}" \ + --quantize_with_minmax float32 "${DTYPE}" "${GRANULARITY}" \ + --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \ + "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" \ + "${TEST_RESULT_FILE}.quantized.mixed.circle" + + # Dump scale, zp, weights values (circle-tensordump) + "${CIRCLE_TENSORDUMP_PATH}" \ + "${TEST_RESULT_FILE}.quantized.mixed.circle" \ + --tensors_to_hdf5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5" + + # Compare result + "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \ + --input_h5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5" \ + --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/quantization" \ + --mode quantization + + if [[ $? -eq 0 ]]; then + touch "${PASSED_TAG}" + fi + ) + + if [[ -f "${PASSED_TAG}" ]]; then + PASSED+=("$TESTCASE") + else + FAILED+=("$TESTCASE") + fi +done +popd + +if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then + echo "FAILED" + for TEST in "${FAILED[@]}" + do + echo "- ${TEST}" + done + exit 255 +fi + +echo "PASSED" +exit 0 diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt index 6d58458ca..1db09cb88 100644 --- a/compiler/pp/CMakeLists.txt +++ b/compiler/pp/CMakeLists.txt @@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) add_library(pp STATIC ${SOURCES}) -set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) target_include_directories(pp PUBLIC include) target_link_libraries(pp PRIVATE nncc_common) target_link_libraries(pp PUBLIC nncc_coverage) diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt index 2221e1702..31b906142 100644 --- a/compiler/record-minmax-conversion-test/CMakeLists.txt +++ b/compiler/record-minmax-conversion-test/CMakeLists.txt @@ -37,6 +37,6 @@ add_test( COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh" "${TEST_CONFIG}" "${ARTIFACTS_BIN_PATH}" - "${NNCC_OVERLAY_DIR}/venv_1_13_2" + "${NNCC_OVERLAY_DIR}/venv_2_8_0" ${RECORD_MINMAX_CONVERSION_TEST} ) diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt index da63bbf5f..b9c08f472 100644 --- a/compiler/record-minmax/CMakeLists.txt +++ b/compiler/record-minmax/CMakeLists.txt @@ -1,25 +1,17 @@ -nnas_find_package(HDF5 COMPONENTS STATIC QUIET) - -if(NOT HDF5_FOUND) - message(STATUS "Build record-minmax: FAILED (missing HDF5)") - return() -endif(NOT HDF5_FOUND) - set(DRIVER "driver/Driver.cpp") file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(record-minmax ${DRIVER} ${SOURCES}) target_include_directories(record-minmax PRIVATE include) -target_include_directories(record-minmax PRIVATE ${HDF5_INCLUDE_DIRS}) -target_link_libraries(record-minmax ${HDF5_CXX_LIBRARIES}) target_link_libraries(record-minmax arser) target_link_libraries(record-minmax safemain) target_link_libraries(record-minmax luci_import) target_link_libraries(record-minmax luci_env) target_link_libraries(record-minmax luci_export) target_link_libraries(record-minmax luci_interpreter) +target_link_libraries(record-minmax dio_hdf5) target_link_libraries(record-minmax vconone) target_link_libraries(record-minmax nncc_coverage) diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake index 9cf12591e..69373e76f 100644 --- a/compiler/record-minmax/requires.cmake +++ b/compiler/record-minmax/requires.cmake @@ -2,4 +2,5 @@ require("luci") require("luci-interpreter") require("safemain") require("arser") +require("dio-hdf5") require("vconone") diff --git a/compiler/record-minmax/src/HDF5Importer.h b/compiler/record-minmax/src/HDF5Importer.h deleted file mode 100644 index 9e98c7752..000000000 --- a/compiler/record-minmax/src/HDF5Importer.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __RECORD_MINMAX_HDF5IMPORTER_H__ -#define __RECORD_MINMAX_HDF5IMPORTER_H__ - -#include <luci_interpreter/core/Tensor.h> - -#include <H5Cpp.h> - -#include <stdexcept> - -using Shape = luci_interpreter::Shape; -using DataType = luci_interpreter::DataType; - -namespace record_minmax -{ - -// HDF5Importer reads an input data saved in the hdf5 file in the given path -// The hierarchy of the hdf5 file is as follows. -// Group "/" -// > Group "value" -// > Group <record_idx> -// > Dataset <input_idx> -// record_idx : index of the record (dataset file can contain multiple records) -// input_idx : index of the input (DNN model can have multiple inputs) -// Ex: the j'th input of the i'th record can be accessed by "/value/i/j" -class HDF5Importer -{ -public: - explicit HDF5Importer(const std::string &path) - { - if (_file.isHdf5(path) == false) - throw std::runtime_error("Given data file is not HDF5"); - - _file = H5::H5File(path, H5F_ACC_RDONLY); - } - -public: - /** - * @brief importGroup has to be called before readTensor is called - * Otherwise, readTensor will throw an exception - */ - void importGroup() { _value_grp = _file.openGroup("value"); } - - /** - * @brief Read tensor data from file and store it into buffer - * @details A tensor in the file can be retrieved with (record_idx, input_idx) - * @param record_idx : index of the record - * @param input_idx : index of the input - * @param dtype : pointer to write the tensor's data type - * @param shape : pointer to write the tensor's shape - * @param buffer : pointer to write the tensor's data - */ - void readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape, - void *buffer); - - // Read a raw tensor (no type/shape is specified) - void readTensor(int32_t record_idx, int32_t input_idx, void *buffer); - - bool isRawData() { return _value_grp.attrExists("rawData"); } - - int32_t numRecords() { return _value_grp.getNumObjs(); } - - int32_t numInputs(int32_t record_idx); - -private: - H5::H5File _file; - H5::Group _value_grp; -}; - -} // namespace record_minmax - -#endif // __RECORD_MINMAX_HDF5IMPORTER_H__ diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp index 28ae2b33b..8288d3e5e 100644 --- a/compiler/record-minmax/src/MinMaxObserver.cpp +++ b/compiler/record-minmax/src/MinMaxObserver.cpp @@ -51,6 +51,16 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node, // Bool type tensor is not quantized return; } + if (node->dtype() == DataType::S32) + { + // Integer type tensor is not quantized + return; + } + if (node->dtype() == DataType::S64) + { + // Integer type tensor is not quantized + return; + } // Only support recording of float32 values if (tensor->element_type() != DataType::FLOAT32) @@ -58,9 +68,6 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node, // Exceptions that should be processed in backends switch (node->opcode()) { - case luci::CircleOpcode::ARG_MAX: - // Output of arg_max is the index of the largest value across axes of a tensor. - // It always has integer type. case luci::CircleOpcode::CAST: // Cast is quantized only if it converts <type> -> float. // Other cases should be processed in backends. diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp index c249960f8..10a14516f 100644 --- a/compiler/record-minmax/src/RecordMinMax.cpp +++ b/compiler/record-minmax/src/RecordMinMax.cpp @@ -17,12 +17,12 @@ #include "RecordMinMax.h" #include "RecordFunction.h" #include "MinMaxObserver.h" -#include "HDF5Importer.h" #include <luci/Importer.h> #include <luci/CircleExporter.h> #include <luci/CircleFileExpContract.h> #include <luci/IR/CircleQuantParam.h> +#include <dio_hdf5/HDF5Importer.h> #include <dirent.h> #include <algorithm> @@ -33,12 +33,34 @@ #include <iostream> #include <random> -using Shape = luci_interpreter::Shape; -using DataType = luci_interpreter::DataType; +using Shape = std::vector<loco::Dimension>; +using DataType = loco::DataType; namespace { +uint32_t numElements(const luci::CircleNode *node) +{ + uint32_t num_elements = 1; + for (uint32_t i = 0; i < node->rank(); i++) + num_elements *= node->dim(i).value(); + + return num_elements; +} + +// Throw exception if input has one of the following conditions. +// 1. Have unknown dimension +// 2. Number of elements is 0 +void checkInputDimension(const luci::CircleInput *input) +{ + for (uint32_t i = 0; i < input->rank(); i++) + if (!input->dim(i).known()) + throw std::runtime_error(input->name() + " has unknown dimension"); + + if (numElements(input) == 0) + throw std::runtime_error(input->name() + " is a zero-sized input"); +} + void readDataFromFile(const std::string &filename, std::vector<char> &data, size_t data_size) { assert(data.size() == data_size); // FIX_CALLER_UNLESS @@ -62,6 +84,21 @@ std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements) return input_data; } +template <typename T> +std::vector<T> genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max) +{ + std::uniform_int_distribution<T> dist(min, max); + std::vector<T> input_data(num_elements); + + // Write random data + { + auto const generator = [&gen, &dist]() { return dist(gen); }; + std::generate(begin(input_data), end(input_data), generator); + } + + return input_data; +} + /** * @brief getTensorSize will return size in bytes */ @@ -83,12 +120,12 @@ void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, if (dtype != input_node->dtype()) throw std::runtime_error("Wrong input type."); - if (shape.num_dims() != input_node->rank()) + if (shape.size() != input_node->rank()) throw std::runtime_error("Input rank mismatch."); - for (uint32_t i = 0; i < shape.num_dims(); i++) + for (uint32_t i = 0; i < shape.size(); i++) { - if (shape.dim(i) != input_node->dim(i).value()) + if (not(shape.at(i) == input_node->dim(i))) throw std::runtime_error("Input shape mismatch."); } } @@ -188,6 +225,7 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode, for (auto input : input_nodes) { const auto *input_node = loco::must_cast<const luci::CircleInput *>(input); + checkInputDimension(input_node); total_input_size += getTensorSize(input_node); } @@ -254,6 +292,7 @@ void RecordMinMax::profileRawData(const std::string &mode, const std::string &in for (auto input : input_nodes) { const auto *input_node = loco::must_cast<const luci::CircleInput *>(input); + checkInputDimension(input_node); total_input_size += getTensorSize(input_node); } @@ -296,12 +335,12 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input { try { - HDF5Importer importer(input_data_path); - importer.importGroup(); + dio::hdf5::HDF5Importer importer(input_data_path); + importer.importGroup("value"); bool is_raw_data = importer.isRawData(); - const auto num_records = importer.numRecords(); + const auto num_records = importer.numData(); if (num_records == 0) throw std::runtime_error("The input data file does not contain any record."); @@ -319,12 +358,13 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input { const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]); assert(input_node->index() == input_idx); + checkInputDimension(input_node); std::vector<char> input_data(getTensorSize(input_node)); if (!is_raw_data) { DataType dtype; - Shape shape(input_node->rank()); + Shape shape; importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data()); // Check the type and the shape of the input data is valid @@ -376,43 +416,47 @@ void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float mi { const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]); assert(input_node->index() == input_idx); - uint32_t num_elements = 1; - for (uint32_t i = 0; i < input_node->rank(); i++) - { - if (!input_node->dim(i).known()) - throw std::runtime_error("Input dimension must be known"); + checkInputDimension(input_node); - num_elements *= input_node->dim(i).value(); - } - - if (num_elements == 0) - throw std::runtime_error("Only support non-zero sized inputs"); + const auto num_elements = numElements(input_node); // TODO Support more input data types assert(input_node->dtype() == loco::DataType::FLOAT32 || - input_node->dtype() == loco::DataType::BOOL); + input_node->dtype() == loco::DataType::BOOL || + input_node->dtype() == loco::DataType::S32 || + input_node->dtype() == loco::DataType::S64); if (input_node->dtype() == DataType::FLOAT32) - // clang-format off { - std::vector<float> input_data(num_elements); + std::vector<float> input_data(num_elements); - // Write random data - for (auto &iter : input_data) - iter = static_cast<float>(dist(gen)); + // Write random data + for (auto &iter : input_data) + iter = static_cast<float>(dist(gen)); - // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs) - // We can redcue the copy by directly writing data from file to interpreter inputs - _interpreter->writeInputTensor(input_node, input_data.data(), - input_data.size() * sizeof(float)); + // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs) + // We can redcue the copy by directly writing data from file to interpreter inputs + _interpreter->writeInputTensor(input_node, input_data.data(), + input_data.size() * sizeof(float)); } - // clang-format on else if (input_node->dtype() == DataType::BOOL) { auto input_data = genRandomBoolData(gen, num_elements); _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size() * sizeof(uint8_t)); } + else if (input_node->dtype() == DataType::S32) + { + auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100); + _interpreter->writeInputTensor(input_node, input_data.data(), + input_data.size() * sizeof(int32_t)); + } + else if (input_node->dtype() == DataType::S64) + { + auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100); + _interpreter->writeInputTensor(input_node, input_data.data(), + input_data.size() * sizeof(int64_t)); + } } _interpreter->interpret(); diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt index ca7eddc6f..f57102f1f 100644 --- a/compiler/souschef/CMakeLists.txt +++ b/compiler/souschef/CMakeLists.txt @@ -1,7 +1,7 @@ nnas_find_package(Protobuf QUIET) if(NOT Protobuf_FOUND) - message(STATUS "Build souschef: FAILED (missing Protobuf") + message(STATUS "Build souschef: FAILED (missing Protobuf)") return() endif(NOT Protobuf_FOUND) diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt index 3e7e57747..0b4739374 100644 --- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt +++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt @@ -72,7 +72,7 @@ list(APPEND TEST_DEPS "${TEST_RUNNER}") get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) -set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2") +set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0") ### ### Generate test.config diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt index 6ba55c357..9e1cb720f 100644 --- a/compiler/tfl-inspect/CMakeLists.txt +++ b/compiler/tfl-inspect/CMakeLists.txt @@ -10,5 +10,6 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES}) target_include_directories(tfl-inspect PRIVATE src) target_link_libraries(tfl-inspect arser) target_link_libraries(tfl-inspect foder) -target_link_libraries(tfl-inspect mio_tflite260) +target_link_libraries(tfl-inspect mio_tflite280) +target_link_libraries(tfl-inspect mio_tflite280_helper) target_link_libraries(tfl-inspect safemain) diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake index 9a7477b81..a11f6b200 100644 --- a/compiler/tfl-inspect/requires.cmake +++ b/compiler/tfl-inspect/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-tflite260") +require("mio-tflite280") require("safemain") diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp index 41a8396bb..6c4529516 100644 --- a/compiler/tfl-inspect/src/Reader.cpp +++ b/compiler/tfl-inspect/src/Reader.cpp @@ -16,6 +16,8 @@ #include "Reader.h" +#include <mio_tflite280/Helper.h> + #include <cassert> #include <sstream> #include <string> @@ -23,72 +25,6 @@ namespace tflinspect { -// This will provide v3/v3a format neutral BuiltinOperator -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) -{ - assert(opcode != nullptr); - int8_t dp_code = opcode->deprecated_builtin_code(); - // 127 is max of int8_t which is upper bound of v3 builtin_code - // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 - if (dp_code < 127 && dp_code >= 0) - return tflite::BuiltinOperator(dp_code); - return opcode->builtin_code(); -} - -bool is_valid(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); -} - -bool is_custom(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (code == tflite::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const tflite::OperatorCode *opcode) -{ - assert(opcode); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (!opcode->custom_code()) - return "(invalid custom)"; - - std::string custom_op = "CUSTOM("; - custom_op += opcode->custom_code()->c_str(); - custom_op += ")"; - return custom_op; - } - - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return tflite::EnumNameBuiltinOperator(code); -} - -const char *tensor_type(const tflite::Tensor *tensor) -{ - return tflite::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const tflite::Tensor *tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - auto name = tensor->name(); - if (name) - return name->c_str(); - - return kEmptyTensorName; -} - Reader::Reader(const tflite::Model *model) { _subgraphs = model->subgraphs(); @@ -135,7 +71,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return tflinspect::builtin_code_neutral(opcode); + return mio::tflite::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const tflite::Operator *op) const @@ -144,14 +80,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::tflite::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - return tflinspect::opcode_name(opcode); + return mio::tflite::opcode_name(opcode); } bool Reader::select_subgraph(uint32_t sgindex) diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h index 91b7bb940..98554cf85 100644 --- a/compiler/tfl-inspect/src/Reader.h +++ b/compiler/tfl-inspect/src/Reader.h @@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); -bool is_valid(const tflite::OperatorCode *opcode); -bool is_custom(const tflite::OperatorCode *opcode); -std::string opcode_name(const tflite::OperatorCode *opcode); -const char *tensor_type(const tflite::Tensor *tensor); -const char *tensor_name(const tflite::Tensor *tensor); - /** * @brief Loads TF lite file and provides helpers to access attributes */ diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt index a87d30c5e..2fba335ea 100644 --- a/compiler/tfl-verify/CMakeLists.txt +++ b/compiler/tfl-verify/CMakeLists.txt @@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES}) target_include_directories(tfl-verify PRIVATE src) target_link_libraries(tfl-verify arser) target_link_libraries(tfl-verify foder) -target_link_libraries(tfl-verify mio_tflite260) +target_link_libraries(tfl-verify mio_tflite280) target_link_libraries(tfl-verify safemain) target_link_libraries(tfl-verify cwrap) diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake index 72803d890..b107bdfe7 100644 --- a/compiler/tfl-verify/requires.cmake +++ b/compiler/tfl-verify/requires.cmake @@ -1,5 +1,5 @@ require("arser") require("foder") -require("mio-tflite260") +require("mio-tflite280") require("safemain") require("cwrap") diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt index ac7fe4b7c..948b1cecd 100644 --- a/compiler/tflchef/CMakeLists.txt +++ b/compiler/tflchef/CMakeLists.txt @@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND) return() endif(NOT Protobuf_FOUND) -if(NOT TARGET mio_tflite260) - message(STATUS "Build tflchef: FAILED (missing mio_tflite260)") +if(NOT TARGET mio_tflite280) + message(STATUS "Build tflchef: FAILED (missing mio_tflite280)") return() -endif(NOT TARGET mio_tflite260) +endif(NOT TARGET mio_tflite280) # Recipe Parser add_subdirectory(proto) diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt index 413b78b15..6b6fed57b 100644 --- a/compiler/tflchef/core/CMakeLists.txt +++ b/compiler/tflchef/core/CMakeLists.txt @@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include) target_include_directories(tflchef_core PRIVATE src) target_link_libraries(tflchef_core tflchef_proto) target_link_libraries(tflchef_core tflchef_log) -target_link_libraries(tflchef_core mio_tflite260) +target_link_libraries(tflchef_core mio_tflite280) target_link_libraries(tflchef_core souschef) diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index ada5ff5d1..93b9334a6 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -722,15 +722,13 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs); auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs); - auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name()); - auto key = flatbuffer_builder->CreateString(rec_signature_def.key()); - // TODO add validation for method_name and key + auto signature_key = flatbuffer_builder->CreateString(rec_signature_def.signature_key()); + // TODO add validation for signature_key ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder}; signature_def_builder.add_inputs(inputs); signature_def_builder.add_outputs(outputs); - signature_def_builder.add_method_name(method_name); - signature_def_builder.add_key(key); + signature_def_builder.add_signature_key(signature_key); signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index()); signdef_vec.emplace_back(signature_def_builder.Finish()); diff --git a/compiler/tflchef/core/src/Op/FullyConnected.cpp b/compiler/tflchef/core/src/Op/FullyConnected.cpp index 45269916c..7173a67ba 100644 --- a/compiler/tflchef/core/src/Op/FullyConnected.cpp +++ b/compiler/tflchef/core/src/Op/FullyConnected.cpp @@ -29,6 +29,7 @@ flatbuffers::Offset<void> FullyConnectedChef::value(flatbuffers::FlatBufferBuild tflite::FullyConnectedOptionsBuilder fc_options_builder{fbb}; fc_options_builder.add_fused_activation_function(tflite_activation); + fc_options_builder.add_keep_num_dims(operation.fullyconnected_options().keep_num_dims()); return fc_options_builder.Finish().Union(); } diff --git a/compiler/tflchef/core/src/Op/SVDF.cpp b/compiler/tflchef/core/src/Op/SVDF.cpp new file mode 100644 index 000000000..690896cf1 --- /dev/null +++ b/compiler/tflchef/core/src/Op/SVDF.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SVDF.h" +#include "Convert.h" + +#include <cassert> + +flatbuffers::Offset<void> SVDFChef::value(flatbuffers::FlatBufferBuilder &fbb) const +{ + assert(_operation->has_svdf_options()); + + const auto &svdf_options = _operation->svdf_options(); + + const auto tflite_activation = as_tflite_activation(svdf_options.activation()); + + tflite::SVDFOptionsBuilder svdf_options_builder{fbb}; + svdf_options_builder.add_fused_activation_function(tflite_activation); + svdf_options_builder.add_asymmetric_quantize_inputs(svdf_options.asymmetric_quantize_inputs()); + svdf_options_builder.add_rank(svdf_options.rank()); + + return svdf_options_builder.Finish().Union(); +} + +std::unique_ptr<OpChef> SVDFChefFactory::create(const tflchef::Operation *operation) const +{ + return std::unique_ptr<OpChef>{new SVDFChef{operation}}; +} diff --git a/compiler/tflchef/core/src/Op/SVDF.h b/compiler/tflchef/core/src/Op/SVDF.h new file mode 100644 index 000000000..9bf0b6efb --- /dev/null +++ b/compiler/tflchef/core/src/Op/SVDF.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OP_SVDF_H__ +#define __OP_SVDF_H__ + +#include "OpChef.h" + +class SVDFChef final : public OpChef +{ +public: + explicit SVDFChef(const tflchef::Operation *operation) : _operation{operation} + { + // DO NOTHING + } + +public: + tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SVDF; } + + tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SVDFOptions; } + + flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override; + +private: + const tflchef::Operation *_operation; +}; + +struct SVDFChefFactory final : public OpChefFactory +{ + std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override; +}; + +#endif // __OP_SVDF_H__ diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def index b1e8a3829..beebd359f 100644 --- a/compiler/tflchef/core/src/OpChef.def +++ b/compiler/tflchef/core/src/OpChef.def @@ -104,6 +104,7 @@ OP_CHEF(Squeeze, SqueezeChefFactory) OP_CHEF(StridedSlice, StridedSliceChefFactory) OP_CHEF(Sub, SubChefFactory) OP_CHEF(Sum, SumChefFactory) +OP_CHEF(SVDF, SVDFChefFactory) OP_CHEF(Tanh, TanhChefFactory) OP_CHEF(Tile, TileChefFactory) OP_CHEF(TopKV2, TopKV2ChefFactory) diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h index 35688ba95..159019abf 100644 --- a/compiler/tflchef/core/src/OpChefs.h +++ b/compiler/tflchef/core/src/OpChefs.h @@ -117,6 +117,7 @@ #include "Op/StridedSlice.h" #include "Op/Sub.h" #include "Op/Sum.h" +#include "Op/SVDF.h" #include "Op/Tanh.h" #include "Op/Tile.h" #include "Op/TopKV2.h" diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto index 4162cb123..1abefafe1 100644 --- a/compiler/tflchef/proto/tflchef.proto +++ b/compiler/tflchef/proto/tflchef.proto @@ -182,6 +182,7 @@ message FloorModOptions { message FullyConnectedOptions { optional Activation activation = 1 [default = NONE]; + optional bool keep_num_dims = 2 [ default = false ]; } message AddOptions { @@ -366,6 +367,12 @@ message SquaredDifferenceOptions { // None } +message SVDFOptions { + optional int32 rank = 1 [default = 0]; + optional Activation activation = 2 [default = NONE]; + optional bool asymmetric_quantize_inputs = 3 [default = false]; +} + message FillOptions { // None } @@ -589,7 +596,7 @@ message Operation { optional ZerosLikeOptions zeros_like_options = 153; // ConcatEmbeddingsOptions 154 // LSHProjectionOptions 155 - // SVDFOptions 156 + optional SVDFOptions svdf_options = 156; // RNNOptions 157 optional L2NormOptions l2norm_options = 158; optional LocalResponseNormalizationOptions local_response_normalization_options = 159; @@ -658,8 +665,8 @@ message TensorMap { message SignatureDef { repeated TensorMap inputs = 4; repeated TensorMap outputs = 5; - optional string method_name = 6; - optional string key = 10; + optional string signature_key = 6; + // optional string key = 10; obsolete in TF2.8.0 optional uint32 subgraph_index = 12; } diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake index 78bfa2d07..a01da4258 100644 --- a/compiler/tflchef/requires.cmake +++ b/compiler/tflchef/requires.cmake @@ -1,7 +1,7 @@ require("arser") require("nnkit") require("cwrap") -require("mio-tflite260") +require("mio-tflite280") require("safemain") require("hermes") require("hermes-std") diff --git a/compiler/tflchef/tests/CMakeLists.txt b/compiler/tflchef/tests/CMakeLists.txt index 5c4dff012..26cf67f4f 100644 --- a/compiler/tflchef/tests/CMakeLists.txt +++ b/compiler/tflchef/tests/CMakeLists.txt @@ -1,10 +1,11 @@ -if(NOT TARGET nnkit-run) - return() -endif(NOT TARGET nnkit-run) - -if(NOT TARGET nnkit_tflite_backend) - return() -endif(NOT TARGET nnkit_tflite_backend) +set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>) +set(TFLCHEF_REVERSE_PATH $<TARGET_FILE:tflchef-reverse>) +if(DEFINED ENV{BUILD_HOST_EXEC}) + # TODO use better way to represent path for host executable + set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file) + set(TFLCHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/reverse/tflchef-reverse) + message(STATUS "TFLCHEF_FILE_PATH = ${TFLCHEF_FILE_PATH}") +endif(DEFINED ENV{BUILD_HOST_EXEC}) nncc_find_resource(TensorFlowLiteRecipes) set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}") @@ -26,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .tflite add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} - COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} - DEPENDS tflchef-file ${RECIPE_SOURCE_FILE} + COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} COMMENT "Generating ${RECIPE_OUTPUT_FILE}") list(APPEND TESTS ${RECIPE_PREFIX}) @@ -52,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES}) # Generate .tflite add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} - COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} - DEPENDS tflchef-file ${RECIPE_SOURCE_FILE} + COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} COMMENT "Generating ${RECIPE_OUTPUT_FILE}") list(APPEND TESTS ${RECIPE_PREFIX}) @@ -76,16 +77,16 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES}) # Generate .gen.recipe from generated .tflite add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} - COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} - DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE} + COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") # now we are going to generate .gen.tflite from .gen.recipe # to check generated .gen.recipe file is correct by using it. # as weight values may be different, binary comparision is not acceptable. add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} - COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} - DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") list(APPEND TESTS ${TFLITE_PREFIX}.gen) @@ -104,13 +105,13 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES}) # Generate .gen.recipe from generated .tflite add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} - COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} - DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE} + COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} - COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} - DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") list(APPEND TESTS ${TFLITE_PREFIX}.gen) @@ -123,7 +124,9 @@ add_custom_target(tflchef_testfiles ALL DEPENDS ${TESTFILES}) # Using mio_tflite_validate for temporary as it only calls flatbuffer validate # TODO do testing with running the model with runtime/interpreter +# NOTE for ARM32 cross build, $<TARGET_FILE:mio_tflite280_validate> is used as-is +# as test should run in ARM32 device add_test(NAME tflchef_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runvalidate.sh" - $<TARGET_FILE:mio_tflite_validate> + $<TARGET_FILE:mio_tflite280_validate> ${TESTS}) diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe index 4481752ef..9e95edf00 100644 --- a/compiler/tflchef/tests/signature_def_index/test.recipe +++ b/compiler/tflchef/tests/signature_def_index/test.recipe @@ -50,8 +50,7 @@ signature_def { name: "ofm1" tensor_index: 1 } - method_name: "serving_default" - key: "serv" + signature_key: "serving_default" subgraph_index: 0 } input: "ifm" diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe index 79be25138..4847f7dd8 100644 --- a/compiler/tflchef/tests/signature_def_name/test.recipe +++ b/compiler/tflchef/tests/signature_def_name/test.recipe @@ -50,8 +50,7 @@ signature_def { name: "out1" tensor: "ofm1" } - method_name: "serving_default" - key: "serv" + signature_key: "serving_default" subgraph_index: 0 } input: "ifm" diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt index 3c4c3fff6..3c3352b0a 100644 --- a/compiler/tflchef/tflite/CMakeLists.txt +++ b/compiler/tflchef/tflite/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(tflchef_tflite STATIC ${SOURCES}) target_include_directories(tflchef_tflite PUBLIC include) target_include_directories(tflchef_tflite PRIVATE src) target_link_libraries(tflchef_tflite tflchef_proto) -target_link_libraries(tflchef_tflite mio_tflite260) +target_link_libraries(tflchef_tflite mio_tflite280) +target_link_libraries(tflchef_tflite mio_tflite280_helper) target_link_libraries(tflchef_tflite cwrap) target_link_libraries(tflchef_tflite souschef) diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp index 1f6e73aa6..bbc749fe4 100644 --- a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp +++ b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp @@ -48,6 +48,7 @@ tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TF auto op_options = operation->mutable_fullyconnected_options(); op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function())); + op_options->set_keep_num_dims(op_params->keep_num_dims()); return operation; } diff --git a/compiler/tflchef/tflite/src/Op/SVDF.cpp b/compiler/tflchef/tflite/src/Op/SVDF.cpp new file mode 100644 index 000000000..015f968a8 --- /dev/null +++ b/compiler/tflchef/tflite/src/Op/SVDF.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SVDF.h" + +#include "Convert.h" + +namespace tflchef +{ + +void TFliteOpSVDF::filler(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const +{ + const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); + assert(inputs.size() == 5); + + // optional input tensor idx has minus value. + const bool hasBias = (inputs.at(3) >= 0); + + // Note: last input is variable tensor without data + import->set_tensor_filler(inputs.at(1)); + import->set_tensor_filler(inputs.at(2)); + if (hasBias) + import->set_tensor_filler(inputs.at(3)); +} + +tflchef::Operation *TFliteOpSVDF::build(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const +{ + const auto op_params = op->builtin_options_as_SVDFOptions(); + assert(op_params != nullptr); + + auto operation = model_recipe->add_operation(); + + operation->set_type("SVDF"); + + auto op_options = operation->mutable_svdf_options(); + + op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function())); + op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs()); + op_options->set_rank(op_params->rank()); + + return operation; +} + +} // namespace tflchef diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/SVDF.h new file mode 100644 index 000000000..a59ca54a2 --- /dev/null +++ b/compiler/tflchef/tflite/src/Op/SVDF.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TFLITE_OP_SVDF_H__ +#define __TFLITE_OP_SVDF_H__ + +#include "TFliteOpChef.h" + +namespace tflchef +{ + +/** + * @brief tflchef operator builder for SVDF + */ +class TFliteOpSVDF : public TFliteOpChef +{ +public: + void filler(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const override; + tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import, + tflchef::ModelRecipe *model_recipe) const override; +}; + +} // namespace tflchef + +#endif // __TFLITE_OP_SVDF_H__ diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp index d9215a4c4..0701707c1 100644 --- a/compiler/tflchef/tflite/src/RecipeChef.cpp +++ b/compiler/tflchef/tflite/src/RecipeChef.cpp @@ -15,6 +15,7 @@ */ #include <tflchef/RecipeChef.h> +#include <mio_tflite280/Helper.h> #include "Convert.h" #include "TFliteImport.h" @@ -42,7 +43,7 @@ void set_inputs(TFliteImport *import, tflchef::Operation *operation, const tflit else { auto tensor = tensors->Get(input); - std::string name = tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); operation->add_input(name); } } @@ -56,7 +57,7 @@ void set_outputs(TFliteImport *import, tflchef::Operation *operation, const tfli for (auto output : outputs) { auto tensor = tensors->Get(output); - std::string name = tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); operation->add_output(name); } } @@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model) ::tflchef::Operand *operand = model_recipe->add_operand(); - operand->set_name(tensor_name(tensor)); + operand->set_name(mio::tflite::tensor_name(tensor)); operand->set_type(as_tflchef_type(tensor->type())); operand->set_is_variable(tensor->is_variable()); @@ -311,14 +312,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model) for (const auto input : inputs) { auto tensor = tensors->Get(input); - std::string name = tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); model_recipe->add_input(name); } for (const auto output : outputs) { auto tensor = tensors->Get(output); - std::string name = tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); model_recipe->add_output(name); } diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp index 1462ee7f4..7114ab019 100644 --- a/compiler/tflchef/tflite/src/TFliteImport.cpp +++ b/compiler/tflchef/tflite/src/TFliteImport.cpp @@ -18,50 +18,13 @@ #include "Convert.h" +#include <mio_tflite280/Helper.h> + #include <sstream> namespace tflchef { -const char *kEmptyTensorName = "(noname)"; - -const char *tensor_type(const tflite::Tensor *tensor) -{ - return tflite::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const tflite::Tensor *tensor) -{ - auto name = tensor->name(); - if (name) - return name->c_str(); - return kEmptyTensorName; -} - -// This will provide v3/v3a format neutral BuiltinOperator -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) -{ - assert(opcode != nullptr); - int8_t dp_code = opcode->deprecated_builtin_code(); - // 127 is max of int8_t which is upper bound of v3 builtin_code - // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 - if (dp_code < 127 && dp_code >= 0) - return tflite::BuiltinOperator(dp_code); - return opcode->builtin_code(); -} - -bool is_valid(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); -} - -bool is_custom(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (code == tflite::BuiltinOperator_CUSTOM); -} - TFliteImport::TFliteImport(const tflite::Model *model) { _subgraphs = model->subgraphs(); @@ -104,7 +67,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return builtin_code_neutral(opcode); + return mio::tflite::builtin_code_neutral(opcode); } std::string TFliteImport::opcode_name(const tflite::Operator *op) const @@ -113,14 +76,14 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::tflite::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - if (is_custom(opcode)) + if (mio::tflite::is_custom(opcode)) { if (!opcode->custom_code()) return "(invalid custom)"; @@ -128,7 +91,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const return opcode->custom_code()->c_str(); } - tflite::BuiltinOperator code = builtin_code_neutral(opcode); + tflite::BuiltinOperator code = mio::tflite::builtin_code_neutral(opcode); return EnumNameBuiltinOperator(code); } diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h index 43b5bbaff..e6722e455 100644 --- a/compiler/tflchef/tflite/src/TFliteImport.h +++ b/compiler/tflchef/tflite/src/TFliteImport.h @@ -34,12 +34,6 @@ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>> using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>; using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>; -const char *tensor_type(const tflite::Tensor *tensor); -const char *tensor_name(const tflite::Tensor *tensor); -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); -bool is_valid(const tflite::OperatorCode *opcode); -bool is_custom(const tflite::OperatorCode *opcode); - /** * @brief Loads TF lite file and provides helpers to access attributes */ diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h index 26ada7d0a..b38b35a61 100644 --- a/compiler/tflchef/tflite/src/TFliteOpChefs.h +++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h @@ -117,6 +117,7 @@ #include "Op/StridedSlice.h" #include "Op/Sub.h" #include "Op/Sum.h" +#include "Op/SVDF.h" #include "Op/Tanh.h" #include "Op/Tile.h" #include "Op/TopKV2.h" diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h index 06394ddfa..4cbe7cfcb 100644 --- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h +++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h @@ -154,6 +154,7 @@ private: REG_TFL_OP(STRIDED_SLICE, TFliteOpStridedSlice); REG_TFL_OP(SUB, TFliteOpSub); REG_TFL_OP(SUM, TFliteOpSum); + REG_TFL_OP(SVDF, TFliteOpSVDF); REG_TFL_OP(TANH, TFliteOpTanh); REG_TFL_OP(TILE, TFliteOpTile); REG_TFL_OP(TOPK_V2, TFliteOpTopKV2); diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt index 83f7febad..fac0be6bf 100644 --- a/compiler/tfldump/CMakeLists.txt +++ b/compiler/tfldump/CMakeLists.txt @@ -1,7 +1,7 @@ -if(NOT TARGET mio_tflite260) - message(STATUS "Build tfldump: FAILED (missing mio_tflite260)") +if(NOT TARGET mio_tflite280) + message(STATUS "Build tfldump: FAILED (missing mio_tflite280)") return() -endif(NOT TARGET mio_tflite260) +endif(NOT TARGET mio_tflite280) set(DRIVER "driver/Driver.cpp") @@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(tfldump ${DRIVER} ${SOURCES}) target_include_directories(tfldump PRIVATE include) target_link_libraries(tfldump arser) -target_link_libraries(tfldump mio_tflite260) +target_link_libraries(tfldump mio_tflite280) +target_link_libraries(tfldump mio_tflite280_helper) target_link_libraries(tfldump safemain) -target_link_libraries(tfldump flatbuffers-1.12) diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake index d0f9cccba..b1abf9486 100644 --- a/compiler/tfldump/requires.cmake +++ b/compiler/tfldump/requires.cmake @@ -1,3 +1,3 @@ require("arser") -require("mio-tflite260") +require("mio-tflite280") require("safemain") diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 2351e4c3d..2a87e47d7 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -15,6 +15,7 @@ */ #include <tfldump/Dump.h> +#include <mio_tflite280/Helper.h> #include "Read.h" #include "OpPrinter.h" @@ -127,7 +128,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) // dump operands(tensors) os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) " - << "B(buffer index) OperandName" << std::endl; + << "B(buffer index) (variable) OperandName" << std::endl; for (uint32_t i = 0; i < tensors->Length(); ++i) { // TODO refactor to some better structure @@ -137,7 +138,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) if (tensor->shape()) dims = tflread::as_index_vector(tensor->shape()); - os << "T(" << reader.subgraph_index() << ":" << i << ") " << tflread::tensor_type(tensor) + os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::tflite::tensor_type(tensor) << " "; os << "(" << dims << ") "; if (tensor->shape_signature()) @@ -146,7 +147,11 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) os << "(" << dims_sig << ") "; } os << "B(" << tensor->buffer() << ") "; - os << tflread::tensor_name(tensor) << std::endl; + if (tensor->is_variable()) + { + os << "(variable) "; + } + os << mio::tflite::tensor_name(tensor) << std::endl; if (auto q_params = tensor->quantization()) { @@ -298,7 +303,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) if (input >= 0) { auto tensor = tensors->Get(input); - os << tflread::tensor_name(tensor); + os << mio::tflite::tensor_name(tensor); } os << std::endl; } @@ -308,7 +313,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) if (output >= 0) { auto tensor = tensors->Get(output); - os << tflread::tensor_name(tensor); + os << mio::tflite::tensor_name(tensor); } os << std::endl; } @@ -321,14 +326,14 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader) for (const auto input : reader.inputs()) { auto tensor = tensors->Get(input); - std::string name = tflread::tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl; } for (const auto output : reader.outputs()) { auto tensor = tensors->Get(output); - std::string name = tflread::tensor_name(tensor); + std::string name = mio::tflite::tensor_name(tensor); os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl; } @@ -360,7 +365,7 @@ void dump_model(std::ostream &os, const tflite::Model *model) tflite::BuiltinOperator op_code = opcode->builtin_code(); tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code()); - auto op_name = tflread::opcode_name(opcode); + auto op_name = mio::tflite::opcode_name(opcode); auto op_version = opcode->version(); os << "[" << opcode_index << "] " << op_name << " (code: " << op_code @@ -405,9 +410,8 @@ void dump_model(std::ostream &os, const tflite::Model *model) for (uint32_t i = 0; i < signaturedefs->Length(); ++i) { auto sign_i = signaturedefs->Get(i); - os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key(" - << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")" - << std::endl; + os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph(" + << sign_i->subgraph_index() << ")" << std::endl; auto inputs_i = sign_i->inputs(); for (uint32_t t = 0; t < inputs_i->Length(); ++t) diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp index fe04a5dd6..d2f6e06f1 100644 --- a/compiler/tfldump/src/Load.cpp +++ b/compiler/tfldump/src/Load.cpp @@ -76,7 +76,7 @@ public: { if (_value != -1) { - // Close on descturction + // Close on destructor close(_value); } } diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index 90cba7173..47edcb086 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -602,6 +602,23 @@ public: } }; +class SVDFPrinter : public OpPrinter +{ +public: + void options(const tflite::Operator *op, std::ostream &os) const override + { + if (auto *params = op->builtin_options_as_SVDFOptions()) + { + os << " "; + os << "rank(" << params->rank() << ") "; + os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function()) + << ") "; + os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") "; + os << std::endl; + } + } +}; + class TransposeConvPrinter : public OpPrinter { public: @@ -776,6 +793,7 @@ OpPrinterRegistry::OpPrinterRegistry() _op_map[tflite::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>(); _op_map[tflite::BuiltinOperator_SUB] = make_unique<SubPrinter>(); _op_map[tflite::BuiltinOperator_SUM] = make_unique<ReducerPrinter>(); + _op_map[tflite::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>(); _op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>(); // There is no Option for TOPK_V2 _op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] = diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp index 8b3a96e83..454e3a8a1 100644 --- a/compiler/tfldump/src/Read.cpp +++ b/compiler/tfldump/src/Read.cpp @@ -16,76 +16,14 @@ #include "Read.h" +#include <mio_tflite280/Helper.h> + #include <sstream> #include <string> namespace tflread { -// This will provide v3/v3a format neutral BuiltinOperator -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) -{ - assert(opcode != nullptr); - int8_t dp_code = opcode->deprecated_builtin_code(); - if (dp_code < 127 && dp_code >= 0) - return tflite::BuiltinOperator(dp_code); - return opcode->builtin_code(); -} - -bool is_valid(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); -} - -bool is_custom(const tflite::OperatorCode *opcode) -{ - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return (code == tflite::BuiltinOperator_CUSTOM); -} - -std::string opcode_name(const tflite::OperatorCode *opcode) -{ - assert(opcode); - - if (!is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid)"; - return oss.str(); - } - - if (is_custom(opcode)) - { - if (!opcode->custom_code()) - return "(invalid custom)"; - - std::string custom_op = "CUSTOM("; - custom_op += opcode->custom_code()->c_str(); - custom_op += ")"; - return custom_op; - } - - tflite::BuiltinOperator code = builtin_code_neutral(opcode); - return tflite::EnumNameBuiltinOperator(code); -} - -const char *tensor_type(const tflite::Tensor *tensor) -{ - return tflite::EnumNameTensorType(tensor->type()); -} - -const char *tensor_name(const tflite::Tensor *tensor) -{ - static const char *kEmptyTensorName = "(noname)"; - - auto name = tensor->name(); - if (name) - return name->c_str(); - - return kEmptyTensorName; -} - Reader::Reader(const tflite::Model *model) { _version = model->version(); @@ -129,7 +67,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return tflread::builtin_code_neutral(opcode); + return mio::tflite::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const tflite::Operator *op) const @@ -138,14 +76,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - if (!is_valid(opcode)) + if (!mio::tflite::is_valid(opcode)) { std::ostringstream oss; oss << "(invalid: " << index << ")"; return oss.str(); } - return tflread::opcode_name(opcode); + return mio::tflite::opcode_name(opcode); } bool Reader::select_subgraph(uint32_t sgindex) diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h index 80f317d0b..1ae63877f 100644 --- a/compiler/tfldump/src/Read.h +++ b/compiler/tfldump/src/Read.h @@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); -bool is_valid(const tflite::OperatorCode *opcode); -bool is_custom(const tflite::OperatorCode *opcode); -std::string opcode_name(const tflite::OperatorCode *opcode); -const char *tensor_type(const tflite::Tensor *tensor); -const char *tensor_name(const tflite::Tensor *tensor); - /** * @brief Loads TF lite file and provides helpers to access attributes */ diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt index 4ea01ad31..a317a6305 100644 --- a/compiler/tflite2circle/CMakeLists.txt +++ b/compiler/tflite2circle/CMakeLists.txt @@ -1,8 +1,8 @@ nnas_include(TargetRequire) unset(REQUIRED_TARGETS) -list(APPEND REQUIRED_TARGETS mio_tflite260) -list(APPEND REQUIRED_TARGETS mio_circle) +list(APPEND REQUIRED_TARGETS mio_tflite280) +list(APPEND REQUIRED_TARGETS mio_circle04) TargetRequire_Return(${REQUIRED_TARGETS}) set(DRIVER "driver/Driver.cpp") @@ -13,8 +13,9 @@ target_include_directories(tflite2circle PRIVATE src) target_link_libraries(tflite2circle arser) target_link_libraries(tflite2circle foder) target_link_libraries(tflite2circle safemain) -target_link_libraries(tflite2circle mio_tflite260) -target_link_libraries(tflite2circle mio_circle) +target_link_libraries(tflite2circle mio_tflite280) +target_link_libraries(tflite2circle mio_tflite280_helper) +target_link_libraries(tflite2circle mio_circle04) target_link_libraries(tflite2circle vconone) target_link_libraries(tflite2circle nncc_coverage) diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake index e39f9eeaf..3db9a2f2a 100644 --- a/compiler/tflite2circle/requires.cmake +++ b/compiler/tflite2circle/requires.cmake @@ -1,6 +1,6 @@ require("arser") require("foder") -require("mio-tflite260") -require("mio-circle") +require("mio-tflite280") +require("mio-circle04") require("safemain") require("vconone") diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h index dc6ff086c..88a4f71df 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions.h +++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h @@ -102,6 +102,7 @@ #include "BuildBuiltinOptions/SqueezeOptions.h" #include "BuildBuiltinOptions/StridedSliceOptions.h" #include "BuildBuiltinOptions/SubOptions.h" +#include "BuildBuiltinOptions/SVDFOptions.h" #include "BuildBuiltinOptions/TileOptions.h" #include "BuildBuiltinOptions/TopKV2Options.h" #include "BuildBuiltinOptions/TransposeOptions.h" diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp index 2619b73eb..27410012d 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp @@ -37,6 +37,7 @@ build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tfl else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8) builtin_options_builder.add_weights_format( circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8); + builtin_options_builder.add_keep_num_dims(tflite_builtin_options->keep_num_dims()); return builtin_options_builder.Finish(); } diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp new file mode 100644 index 000000000..e23738a69 --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SVDFOptions.h" +#include "DataLookup.h" + +#include <cassert> + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::SVDFOptions> +build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op) +{ + auto *tflite_builtin_options = op->builtin_options_as_SVDFOptions(); + assert(tflite_builtin_options); + + circle::SVDFOptionsBuilder builtin_options_builder{fb}; + builtin_options_builder.add_rank(tflite_builtin_options->rank()); + builtin_options_builder.add_asymmetric_quantize_inputs( + tflite_builtin_options->asymmetric_quantize_inputs()); + builtin_options_builder.add_fused_activation_function( + get_circle_activation_function_type(tflite_builtin_options->fused_activation_function())); + + return builtin_options_builder.Finish(); +} + +} // namespace tflite2circle diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h new file mode 100644 index 000000000..2ddbd3911 --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BBO_SVDF_OPTIONS_H__ +#define __BBO_SVDF_OPTIONS_H__ + +#include <mio/tflite/schema_generated.h> +#include <mio/circle/schema_generated.h> + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::SVDFOptions> +build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op); + +} // namespace tflite2circle + +#endif // __BBO_SVDF_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index 90cc415ff..d483b288f 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -16,11 +16,14 @@ #include <cassert> #include <iostream> +#include <map> #include <memory> #include "CircleModel.h" #include "DataLookup.h" +#include <mio_tflite280/Helper.h> + namespace tflite2circle { @@ -206,7 +209,8 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf auto tflite_inputs = it_sg->inputs(); std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()}; - // apply signature_def to input tensor index so that input orders are correct + // apply signature_def to input tensor index so that input orders follow like tensorflow lite + // interpreter._get_full_signature_list() method, which is ordered(sorted) in name // NOTE we do not need this when circle format supports signature_def if (_tfl_signature_def_offsets != nullptr) { @@ -216,10 +220,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf { auto inputs = it_signdef->inputs(); assert(inputs->size() == input_vec.size()); - uint32_t input_vec_idx = 0; + + std::map<std::string, uint32_t> map_name_index; for (auto it_tm : *inputs) { - input_vec[input_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index()); + map_name_index[it_tm->name()->str()] = it_tm->tensor_index(); + } + uint32_t input_vec_idx = 0; + for (auto &item : map_name_index) + { + input_vec[input_vec_idx++] = item.second; } } } @@ -240,10 +250,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf { auto outputs = it_signdef->outputs(); assert(outputs->size() == output_vec.size()); - uint32_t output_vec_idx = 0; + + std::map<std::string, uint32_t> map_name_index; for (auto it_tm : *outputs) { - output_vec[output_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index()); + map_name_index[it_tm->name()->str()] = it_tm->tensor_index(); + } + uint32_t output_vec_idx = 0; + for (auto &item : map_name_index) + { + output_vec[output_vec_idx++] = item.second; } } } @@ -318,17 +334,6 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec); } -tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) -{ - assert(opcode != nullptr); - int8_t dp_code = opcode->deprecated_builtin_code(); - // 127 is max of int8_t which is upper bound of v3 builtin_code - // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 - if (dp_code < 127 && dp_code >= 0) - return tflite::BuiltinOperator(dp_code); - return opcode->builtin_code(); -} - template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec; @@ -337,8 +342,9 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla { auto custom_code = _fb->CreateString(it->custom_code()); circle::OperatorCodeBuilder operator_code_builder{*_fb}; - // TODO support circle deprecated_builtin_code - auto bt_code = builtin_code_neutral(it); + auto de_code = it->deprecated_builtin_code(); + auto bt_code = it->builtin_code(); + operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code)); operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code)); operator_code_builder.add_custom_code(custom_code); operator_code_builder.add_version(it->version()); diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp index c5ed62e31..7c3aab089 100644 --- a/compiler/tflite2circle/src/DataLookup.cpp +++ b/compiler/tflite2circle/src/DataLookup.cpp @@ -34,6 +34,22 @@ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop) } } +int8_t get_circle_builtin_code(int8_t tfl_bop_i8) +{ + tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i8); + + switch (tfl_bop) + { +#define TFL_OPERATOR(OP) \ + case tflite::BuiltinOperator_##OP: \ + return static_cast<int8_t>(circle::BuiltinOperator_##OP); +#include "TFLOperator.lst" +#undef TFL_OPERATOR + default: + throw std::runtime_error("tflite2circle: wrong op"); + } +} + circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt) { switch (tfl_tt) diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h index 601d014dd..5aeeb6eca 100644 --- a/compiler/tflite2circle/src/DataLookup.h +++ b/compiler/tflite2circle/src/DataLookup.h @@ -30,6 +30,8 @@ namespace tflite2circle */ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop); +int8_t get_circle_builtin_code(int8_t tfl_bop_i8); + /** * @brief Returns circle TensorType according to tflite. * diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst index f2de7e046..d55ba464a 100644 --- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst +++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst @@ -9,7 +9,7 @@ TFL_BUILTIN_OPTIONS(DepthwiseConv2DOptions) //TFL_BUILTIN_OPTIONS(ConcatEmbeddingsOptions) //TFL_BUILTIN_OPTIONS(LSHProjectionOptions) TFL_BUILTIN_OPTIONS(Pool2DOptions) -//TFL_BUILTIN_OPTIONS(SVDFOptions) +TFL_BUILTIN_OPTIONS(SVDFOptions) //TFL_BUILTIN_OPTIONS(RNNOptions) TFL_BUILTIN_OPTIONS(FullyConnectedOptions) TFL_BUILTIN_OPTIONS(SoftmaxOptions) diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 2241c9ec9..3841a1b78 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000000130001) + set(VCONONE_VERSION 0x0000000000140001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option |